PyPI - pixeltable - Versions diffs - 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl - Mend

pixeltable 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

pixeltable/__init__.py +2 -2
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/column.py +41 -29
pixeltable/catalog/globals.py +18 -0
pixeltable/catalog/insertable_table.py +30 -10
pixeltable/catalog/table.py +198 -86
pixeltable/catalog/table_version.py +47 -53
pixeltable/catalog/table_version_path.py +2 -2
pixeltable/catalog/view.py +17 -18
pixeltable/dataframe.py +27 -36
pixeltable/env.py +7 -0
pixeltable/exec/__init__.py +0 -1
pixeltable/exec/aggregation_node.py +6 -3
pixeltable/exec/cache_prefetch_node.py +189 -43
pixeltable/exec/data_row_batch.py +5 -22
pixeltable/exec/exec_context.py +2 -2
pixeltable/exec/exec_node.py +3 -2
pixeltable/exec/expr_eval_node.py +23 -16
pixeltable/exec/in_memory_data_node.py +6 -3
pixeltable/exec/sql_node.py +24 -25
pixeltable/exprs/arithmetic_expr.py +12 -5
pixeltable/exprs/array_slice.py +7 -7
pixeltable/exprs/column_property_ref.py +37 -10
pixeltable/exprs/column_ref.py +97 -14
pixeltable/exprs/comparison.py +10 -5
pixeltable/exprs/compound_predicate.py +8 -7
pixeltable/exprs/data_row.py +27 -18
pixeltable/exprs/expr.py +53 -52
pixeltable/exprs/expr_set.py +5 -0
pixeltable/exprs/function_call.py +32 -16
pixeltable/exprs/globals.py +4 -1
pixeltable/exprs/in_predicate.py +8 -7
pixeltable/exprs/inline_expr.py +4 -4
pixeltable/exprs/is_null.py +4 -4
pixeltable/exprs/json_mapper.py +11 -12
pixeltable/exprs/json_path.py +6 -11
pixeltable/exprs/literal.py +5 -5
pixeltable/exprs/method_ref.py +5 -4
pixeltable/exprs/object_ref.py +2 -1
pixeltable/exprs/row_builder.py +88 -36
pixeltable/exprs/rowid_ref.py +12 -11
pixeltable/exprs/similarity_expr.py +12 -7
pixeltable/exprs/sql_element_cache.py +7 -5
pixeltable/exprs/type_cast.py +8 -6
pixeltable/exprs/variable.py +5 -4
pixeltable/func/aggregate_function.py +9 -9
pixeltable/func/expr_template_function.py +6 -5
pixeltable/func/function.py +11 -10
pixeltable/func/udf.py +6 -11
pixeltable/functions/__init__.py +2 -2
pixeltable/functions/globals.py +5 -7
pixeltable/functions/huggingface.py +155 -45
pixeltable/functions/llama_cpp.py +107 -0
pixeltable/functions/mistralai.py +1 -1
pixeltable/functions/ollama.py +147 -0
pixeltable/functions/openai.py +1 -1
pixeltable/functions/replicate.py +72 -0
pixeltable/functions/string.py +9 -0
pixeltable/functions/together.py +1 -1
pixeltable/functions/util.py +5 -2
pixeltable/globals.py +67 -26
pixeltable/index/btree.py +16 -3
pixeltable/index/embedding_index.py +4 -4
pixeltable/io/__init__.py +1 -2
pixeltable/io/fiftyone.py +178 -0
pixeltable/io/globals.py +96 -2
pixeltable/iterators/base.py +3 -2
pixeltable/iterators/document.py +1 -1
pixeltable/iterators/video.py +120 -63
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_21.py +34 -0
pixeltable/metadata/converters/util.py +45 -4
pixeltable/metadata/notes.py +1 -0
pixeltable/metadata/schema.py +8 -0
pixeltable/plan.py +17 -15
pixeltable/py.typed +0 -0
pixeltable/store.py +7 -2
pixeltable/tool/create_test_db_dump.py +1 -1
pixeltable/tool/create_test_video.py +1 -1
pixeltable/tool/embed_udf.py +1 -1
pixeltable/tool/mypy_plugin.py +28 -5
pixeltable/type_system.py +100 -36
pixeltable/utils/coco.py +5 -5
pixeltable/utils/documents.py +15 -1
pixeltable/utils/formatter.py +12 -13
pixeltable/utils/s3.py +6 -3
{pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
pixeltable-0.2.23.dist-info/RECORD +153 -0
pixeltable/exec/media_validation_node.py +0 -43
pixeltable-0.2.21.dist-info/RECORD +0 -148
{pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
{pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
{pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0

pixeltable/func/aggregate_function.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import abc
 import inspect
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type
+from typing import TYPE_CHECKING, Any, Callable, Optional
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
@@ -36,8 +36,8 @@ class AggregateFunction(Function):
     RESERVED_PARAMS = {ORDER_BY_PARAM, GROUP_BY_PARAM}
     def __init__(
-            self, aggregator_class: Type[Aggregator], self_path: str,
-            init_types: List[ts.ColumnType], update_types: List[ts.ColumnType], value_type: ts.ColumnType,
+            self, aggregator_class: type[Aggregator], self_path: str,
+            init_types: list[ts.ColumnType], update_types: list[ts.ColumnType], value_type: ts.ColumnType,
             requires_order_by: bool, allows_std_agg: bool, allows_window: bool):
         self.agg_cls = aggregator_class
         self.requires_order_by = requires_order_by
@@ -86,7 +86,7 @@ class AggregateFunction(Function):
         res += '\n\n' + inspect.getdoc(self.agg_cls.update)
         return res
-    def __call__(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.Expr':
+    def __call__(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.FunctionCall':
         from pixeltable import exprs
         # perform semantic analysis of special parameters 'order_by' and 'group_by'
@@ -128,7 +128,7 @@ class AggregateFunction(Function):
             order_by_clause=[order_by_clause] if order_by_clause is not None else [],
             group_by_clause=[group_by_clause] if group_by_clause is not None else [])
-    def validate_call(self, bound_args: Dict[str, Any]) -> None:
+    def validate_call(self, bound_args: dict[str, Any]) -> None:
         # check that init parameters are not Exprs
         # TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
         import pixeltable.exprs as exprs
@@ -146,10 +146,10 @@ class AggregateFunction(Function):
 def uda(
         *,
         value_type: ts.ColumnType,
-        update_types: List[ts.ColumnType],
-        init_types: Optional[List[ts.ColumnType]] = None,
+        update_types: list[ts.ColumnType],
+        init_types: Optional[list[ts.ColumnType]] = None,
         requires_order_by: bool = False, allows_std_agg: bool = True, allows_window: bool = False,
-) -> Callable[[Type[Aggregator]], AggregateFunction]:
+) -> Callable[[type[Aggregator]], AggregateFunction]:
     """Decorator for user-defined aggregate functions.
     The decorated class must inherit from Aggregator and implement the following methods:
@@ -171,7 +171,7 @@ def uda(
     if init_types is None:
         init_types = []
-    def decorator(cls: Type[Aggregator]) -> AggregateFunction:
+    def decorator(cls: type[Aggregator]) -> AggregateFunction:
         # validate type parameters
         num_init_params = len(inspect.signature(cls.__init__).parameters) - 1
         if num_init_params > 0:

pixeltable/func/expr_template_function.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import inspect
-from typing import Dict, Optional, Any
+from typing import Any, Optional
 import pixeltable
 import pixeltable.exceptions as excs
 from .function import Function
-from .signature import Signature, Parameter
+from .signature import Signature
 class ExprTemplateFunction(Function):
@@ -22,7 +23,7 @@ class ExprTemplateFunction(Function):
         self.param_exprs_by_name = {p.name: p for p in self.param_exprs}
         # verify default values
-        self.defaults: Dict[str, exprs.Literal] = {}  # key: param name, value: default value converted to a Literal
+        self.defaults: dict[str, exprs.Literal] = {}  # key: param name, value: default value converted to a Literal
         for param in signature.parameters.values():
             if param.default is inspect.Parameter.empty:
                 continue
@@ -77,7 +78,7 @@ class ExprTemplateFunction(Function):
     def name(self) -> str:
         return self.self_name
-    def _as_dict(self) -> Dict:
+    def _as_dict(self) -> dict:
         if self.self_path is not None:
             return super()._as_dict()
         return {
@@ -87,7 +88,7 @@ class ExprTemplateFunction(Function):
         }
     @classmethod
-    def _from_dict(cls, d: Dict) -> Function:
+    def _from_dict(cls, d: dict) -> Function:
         if 'expr' not in d:
             return super()._from_dict(d)
         assert 'signature' in d and 'name' in d

pixeltable/func/function.py CHANGED Viewed

@@ -3,12 +3,13 @@ from __future__ import annotations
 import abc
 import importlib
 import inspect
-from typing import Any, Callable, Dict, Optional, Tuple
+from typing import Any, Callable, Optional
 import sqlalchemy as sql
-import pixeltable
+import pixeltable as pxt
 import pixeltable.type_system as ts
 from .globals import resolve_symbol
 from .signature import Signature
@@ -66,13 +67,13 @@ class Function(abc.ABC):
     def help_str(self) -> str:
         return self.display_name + str(self.signature)
-    def __call__(self, *args: Any, **kwargs: Any) -> 'pixeltable.exprs.Expr':
+    def __call__(self, *args: Any, **kwargs: Any) -> 'pxt.exprs.FunctionCall':
         from pixeltable import exprs
         bound_args = self.signature.py_signature.bind(*args, **kwargs)
         self.validate_call(bound_args.arguments)
         return exprs.FunctionCall(self, bound_args.arguments)
-    def validate_call(self, bound_args: Dict[str, Any]) -> None:
+    def validate_call(self, bound_args: dict[str, Any]) -> None:
         """Override this to do custom validation of the arguments"""
         pass
@@ -121,7 +122,7 @@ class Function(abc.ABC):
         """Print source code"""
         print('source not available')
-    def as_dict(self) -> Dict:
+    def as_dict(self) -> dict:
         """
         Return a serialized reference to the instance that can be passed to json.dumps() and converted back
         to an instance with from_dict().
@@ -130,13 +131,13 @@ class Function(abc.ABC):
         classpath = f'{self.__class__.__module__}.{self.__class__.__qualname__}'
         return {'_classpath': classpath, **self._as_dict()}
-    def _as_dict(self) -> Dict:
+    def _as_dict(self) -> dict:
         """Default serialization: store the path to self (which includes the module path)"""
         assert self.self_path is not None
         return {'path': self.self_path}
     @classmethod
-    def from_dict(cls, d: Dict) -> Function:
+    def from_dict(cls, d: dict) -> Function:
         """
         Turn dict that was produced by calling as_dict() into an instance of the correct Function subclass.
         """
@@ -147,14 +148,14 @@ class Function(abc.ABC):
         return func_class._from_dict(d)
     @classmethod
-    def _from_dict(cls, d: Dict) -> Function:
+    def _from_dict(cls, d: dict) -> Function:
         """Default deserialization: load the symbol indicated by the stored symbol_path"""
         assert 'path' in d and d['path'] is not None
         instance = resolve_symbol(d['path'])
         assert isinstance(instance, Function)
         return instance
-    def to_store(self) -> Tuple[Dict, bytes]:
+    def to_store(self) -> tuple[dict, bytes]:
         """
         Serialize the function to a format that can be stored in the Pixeltable store
         Returns:
@@ -165,7 +166,7 @@ class Function(abc.ABC):
         raise NotImplementedError()
     @classmethod
-    def from_store(cls, name: Optional[str], md: Dict, binary_obj: bytes) -> Function:
+    def from_store(cls, name: Optional[str], md: dict, binary_obj: bytes) -> Function:
         """
         Create a Function instance from the serialized representation returned by to_store()
         """

pixeltable/func/udf.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from __future__ import annotations
-from typing import List, Callable, Optional, overload, Any
+from typing import Any, Callable, Optional, overload
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
 from .callable_function import CallableFunction
 from .expr_template_function import ExprTemplateFunction
 from .function import Function
@@ -21,8 +22,6 @@ def udf(decorated_fn: Callable) -> Function: ...
 @overload
 def udf(
         *,
-        return_type: Optional[ts.ColumnType] = None,
-        param_types: Optional[List[ts.ColumnType]] = None,
         batch_size: Optional[int] = None,
         substitute_fn: Optional[Callable] = None,
         is_method: bool = False,
@@ -49,8 +48,6 @@ def udf(*args, **kwargs):
         # Decorator schema invoked with parentheses: @pxt.udf(**kwargs)
         # Create a decorator for the specified schema.
-        return_type = kwargs.pop('return_type', None)
-        param_types = kwargs.pop('param_types', None)
         batch_size = kwargs.pop('batch_size', None)
         substitute_fn = kwargs.pop('substitute_fn', None)
         is_method = kwargs.pop('is_method', None)
@@ -64,9 +61,7 @@ def udf(*args, **kwargs):
         def decorator(decorated_fn: Callable):
             return make_function(
                 decorated_fn,
-                return_type,
-                param_types,
-                batch_size,
+                batch_size=batch_size,
                 substitute_fn=substitute_fn,
                 is_method=is_method,
                 is_property=is_property,
@@ -79,7 +74,7 @@ def udf(*args, **kwargs):
 def make_function(
     decorated_fn: Callable,
     return_type: Optional[ts.ColumnType] = None,
-    param_types: Optional[List[ts.ColumnType]] = None,
+    param_types: Optional[list[ts.ColumnType]] = None,
     batch_size: Optional[int] = None,
     substitute_fn: Optional[Callable] = None,
     is_method: bool = False,
@@ -158,10 +153,10 @@ def make_function(
 def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
 @overload
-def expr_udf(*, param_types: Optional[List[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
+def expr_udf(*, param_types: Optional[list[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
 def expr_udf(*args: Any, **kwargs: Any) -> Any:
-    def make_expr_template(py_fn: Callable, param_types: Optional[List[ts.ColumnType]]) -> ExprTemplateFunction:
+    def make_expr_template(py_fn: Callable, param_types: Optional[list[ts.ColumnType]]) -> ExprTemplateFunction:
         if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
             # this is a named function in a module
             function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'

pixeltable/functions/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from pixeltable.utils.code import local_public_names
-from . import (anthropic, audio, fireworks, huggingface, image, json, mistralai, openai, string, timestamp, together,
-               video, vision, whisper)
+from . import (anthropic, audio, fireworks, huggingface, image, json, llama_cpp, mistralai, ollama, openai, string,
+               timestamp, together, video, vision, whisper)
 from .globals import *
 __all__ = local_public_names(__name__, exclude=['globals']) + local_public_names(globals.__name__)

pixeltable/functions/globals.py CHANGED Viewed

@@ -36,9 +36,7 @@ class sum(func.Aggregator):
         return self.sum
-# disable type checking: mypy doesn't seem to understand that 'sum' is an instance of Function
-# TODO: find a way to have this type-checked
-@sum.to_sql  # type: ignore
+@sum.to_sql
 def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
     # This can produce a Decimal. We are deliberately avoiding an explicit cast to a Bigint here, because that can
     # cause overflows in Postgres. We're instead doing the conversion to the target type in SqlNode.__iter__().
@@ -58,7 +56,7 @@ class count(func.Aggregator):
         return self.count
-@count.to_sql  # type: ignore
+@count.to_sql
 def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
     return sql.sql.func.count(val)
@@ -82,7 +80,7 @@ class min(func.Aggregator):
         return self.val
-@min.to_sql  # type: ignore
+@min.to_sql
 def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
     return sql.sql.func.min(val)
@@ -106,7 +104,7 @@ class max(func.Aggregator):
         return self.val
-@max.to_sql  # type: ignore
+@max.to_sql
 def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
     return sql.sql.func.max(val)
@@ -134,7 +132,7 @@ class mean(func.Aggregator):
         return self.sum / self.count
-@mean.to_sql  # type: ignore
+@mean.to_sql
 def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
     return sql.sql.func.avg(val)

pixeltable/functions/huggingface.py CHANGED Viewed

@@ -7,21 +7,22 @@ first `pip install transformers` (or in some cases, `sentence-transformers`, as
 UDFs).
 """
-from typing import Callable, TypeVar, Optional, Any
+from typing import Any, Callable, Optional, TypeVar
 import PIL.Image
 import pixeltable as pxt
 import pixeltable.env as env
+import pixeltable.exceptions as excs
 from pixeltable.func import Batch
-from pixeltable.functions.util import resolve_torch_device, normalize_image_mode
+from pixeltable.functions.util import normalize_image_mode, resolve_torch_device
 from pixeltable.utils.code import local_public_names
 @pxt.udf(batch_size=32)
 def sentence_transformer(
     sentence: Batch[str], *, model_id: str, normalize_embeddings: bool = False
-) -> Batch[pxt.Array[(None,), float]]:
+) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Computes sentence embeddings. `model_id` should be a pretrained Sentence Transformers model, as described
     in the [Sentence Transformers Pretrained Models](https://sbert.net/docs/sentence_transformer/pretrained_models.html)
@@ -29,7 +30,7 @@ def sentence_transformer(
     __Requirements:__
-    - `pip install sentence-transformers`
+    - `pip install torch sentence-transformers`
     Args:
         sentence: The sentence to embed.
@@ -48,11 +49,15 @@ def sentence_transformer(
         >>> tbl['result'] = sentence_transformer(tbl.sentence, model_id='all-mpnet-base-v2')
     """
     env.Env.get().require_package('sentence_transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from sentence_transformers import SentenceTransformer  # type: ignore
-    model = _lookup_model(model_id, SentenceTransformer)
+    # specifying the device, moves the model to device (gpu:cuda/mps, cpu)
+    model = _lookup_model(model_id, SentenceTransformer, device=device, pass_device_to_create=True)
-    array = model.encode(sentence, normalize_embeddings=normalize_embeddings)
+    # specifying the device, uses it for computation
+    array = model.encode(sentence, device=device, normalize_embeddings=normalize_embeddings)
     return [array[i] for i in range(array.shape[0])]
@@ -70,11 +75,15 @@ def _(model_id: str) -> pxt.ArrayType:
 @pxt.udf
 def sentence_transformer_list(sentences: list, *, model_id: str, normalize_embeddings: bool = False) -> list:
     env.Env.get().require_package('sentence_transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from sentence_transformers import SentenceTransformer
-    model = _lookup_model(model_id, SentenceTransformer)
+    # specifying the device, moves the model to device (gpu:cuda/mps, cpu)
+    model = _lookup_model(model_id, SentenceTransformer, device=device, pass_device_to_create=True)
-    array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
+    # specifying the device, uses it for computation
+    array = model.encode(sentences, device=device, normalize_embeddings=normalize_embeddings)
     return [array[i].tolist() for i in range(array.shape[0])]
@@ -88,7 +97,7 @@ def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: s
     __Requirements:__
-    - `pip install sentence-transformers`
+    - `pip install torch sentence-transformers`
     Parameters:
         sentences1: The first sentence to be paired.
@@ -107,9 +116,13 @@ def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: s
             )
     """
     env.Env.get().require_package('sentence_transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from sentence_transformers import CrossEncoder
-    model = _lookup_model(model_id, CrossEncoder)
+    # specifying the device, moves the model to device (gpu:cuda/mps, cpu)
+    # and uses the device for predict computation
+    model = _lookup_model(model_id, CrossEncoder, device=device, pass_device_to_create=True)
     array = model.predict([[s1, s2] for s1, s2 in zip(sentences1, sentences2)], convert_to_numpy=True)
     return array.tolist()
@@ -118,23 +131,27 @@ def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: s
 @pxt.udf
 def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> list:
     env.Env.get().require_package('sentence_transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from sentence_transformers import CrossEncoder
-    model = _lookup_model(model_id, CrossEncoder)
+    # specifying the device, moves the model to device (gpu:cuda/mps, cpu)
+    # and uses the device for predict computation
+    model = _lookup_model(model_id, CrossEncoder, device=device, pass_device_to_create=True)
     array = model.predict([[sentence1, s2] for s2 in sentences2], convert_to_numpy=True)
     return array.tolist()
 @pxt.udf(batch_size=32)
-def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), float]]:
+def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Computes a CLIP embedding for the specified text. `model_id` should be a reference to a pretrained
     [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
     __Requirements:__
-    - `pip install transformers`
+    - `pip install torch transformers`
     Args:
         text: The string to embed.
@@ -165,14 +182,14 @@ def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), fl
 @pxt.udf(batch_size=32)
-def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,), float]]:
+def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Computes a CLIP embedding for the specified image. `model_id` should be a reference to a pretrained
     [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
     __Requirements:__
-    - `pip install transformers`
+    - `pip install torch transformers`
     Args:
         image: The image to embed.
@@ -215,14 +232,20 @@ def _(model_id: str) -> pxt.ArrayType:
 @pxt.udf(batch_size=4)
-def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
+def detr_for_object_detection(
+    image: Batch[PIL.Image.Image],
+    *,
+    model_id: str,
+    threshold: float = 0.5,
+    revision: str = 'no_timm',
+) -> Batch[dict]:
     """
     Computes DETR object detections for the specified image. `model_id` should be a reference to a pretrained
     [DETR Model](https://huggingface.co/docs/transformers/model_doc/detr).
     __Requirements:__
-    - `pip install transformers`
+    - `pip install torch transformers`
     Args:
         image: The image to embed.
@@ -254,12 +277,12 @@ def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, t
     env.Env.get().require_package('transformers')
     device = resolve_torch_device('auto')
     import torch
-    from transformers import DetrImageProcessor, DetrForObjectDetection
+    from transformers import DetrForObjectDetection, DetrImageProcessor
     model = _lookup_model(
-        model_id, lambda x: DetrForObjectDetection.from_pretrained(x, revision='no_timm'), device=device
+        model_id, lambda x: DetrForObjectDetection.from_pretrained(x, revision=revision), device=device
     )
-    processor = _lookup_processor(model_id, lambda x: DetrImageProcessor.from_pretrained(x, revision='no_timm'))
+    processor = _lookup_processor(model_id, lambda x: DetrImageProcessor.from_pretrained(x, revision=revision))
     normalized_images = [normalize_image_mode(img) for img in image]
     with torch.no_grad():
@@ -286,7 +309,7 @@ def vit_for_image_classification(
     *,
     model_id: str,
     top_k: int = 5
-) -> Batch[list[dict[str, Any]]]:
+) -> Batch[dict[str, Any]]:
     """
     Computes image classifications for the specified image using a Vision Transformer (ViT) model.
     `model_id` should be a reference to a pretrained [ViT Model](https://huggingface.co/docs/transformers/en/model_doc/vit).
@@ -299,7 +322,7 @@ def vit_for_image_classification(
     __Requirements:__
-    - `pip install transformers`
+    - `pip install torch transformers`
     Args:
         image: The image to classify.
@@ -307,30 +330,30 @@ def vit_for_image_classification(
         top_k: The number of classes to return.
     Returns:
-        A list of the `top_k` highest-scoring classes for each image. Each element in the list is a dictionary
-            in the following format:
+        A dictionary containing the output of the image classification model, in the following format:
-            ```python
-            {
-                'p': 0.230,  # class probability
-                'class': 935,  # class ID
-                'label': 'mashed potato',  # class label
-            }
-            ```
+        ```python
+        {
+            'scores': [0.325, 0.198, 0.105],  # list of probabilities of the top-k most likely classes
+            'labels': [340, 353, 386],  # list of class IDs for the top-k most likely classes
+            'label_text': ['zebra', 'gazelle', 'African elephant, Loxodonta africana'],
+                # corresponding text names of the top-k most likely classes
+        ```
     Examples:
         Add a computed column that applies the model `google/vit-base-patch16-224` to an existing
-        Pixeltable column `image` of the table `tbl`:
+        Pixeltable column `image` of the table `tbl`, returning the 10 most likely classes for each image:
         >>> tbl['image_class'] = vit_for_image_classification(
         ...     tbl.image,
-        ...     model_id='google/vit-base-patch16-224'
+        ...     model_id='google/vit-base-patch16-224',
+        ...     top_k=10
         ... )
     """
     env.Env.get().require_package('transformers')
     device = resolve_torch_device('auto')
     import torch
-    from transformers import ViTImageProcessor, ViTForImageClassification
+    from transformers import ViTForImageClassification, ViTImageProcessor
     model: ViTForImageClassification = _lookup_model(model_id, ViTForImageClassification.from_pretrained, device=device)
     processor = _lookup_processor(model_id, ViTImageProcessor.from_pretrained)
@@ -344,19 +367,98 @@ def vit_for_image_classification(
     probs = torch.softmax(logits, dim=-1)
     top_k_probs, top_k_indices = torch.topk(probs, top_k, dim=-1)
+    # There is no official post_process method for ViT models; for consistency, we structure the output
+    # the same way as the output of the DETR model given by `post_process_object_detection`.
     return [
-        [
-            {
-                'p': top_k_probs[n, k].item(),
-                'class': top_k_indices[n, k].item(),
-                'label': model.config.id2label[top_k_indices[n, k].item()],
-            }
-            for k in range(top_k_probs.shape[1])
-        ]
+        {
+            'scores': [top_k_probs[n, k].item() for k in range(top_k_probs.shape[1])],
+            'labels': [top_k_indices[n, k].item() for k in range(top_k_probs.shape[1])],
+            'label_text': [model.config.id2label[top_k_indices[n, k].item()] for k in range(top_k_probs.shape[1])],
+        }
         for n in range(top_k_probs.shape[0])
     ]
+@pxt.udf
+def speech2text_for_conditional_generation(
+    audio: pxt.Audio,
+    *,
+    model_id: str,
+    language: Optional[str] = None,
+) -> str:
+    """
+    Transcribes or translates speech to text using a Speech2Text model. `model_id` should be a reference to a
+    pretrained [Speech2Text](https://huggingface.co/docs/transformers/en/model_doc/speech_to_text) model.
+    __Requirements:__
+    - `pip install torch torchaudio sentencepiece transformers`
+    Args:
+        audio: The audio clip to transcribe or translate.
+        model_id: The pretrained model to use for the transcription or translation.
+        language: If using a multilingual translation model, the language code to translate to. If not provided,
+            the model's default language will be used. If the model is not translation model, is not a
+            multilingual model, or does not support the specified language, an error will be raised.
+    Returns:
+        The transcribed or translated text.
+    Examples:
+        Add a computed column that applies the model `facebook/s2t-small-librispeech-asr` to an existing
+        Pixeltable column `audio` of the table `tbl`:
+        >>> tbl['transcription'] = speech2text_for_conditional_generation(
+        ...     tbl.audio,
+        ...     model_id='facebook/s2t-small-librispeech-asr'
+        ... )
+        Add a computed column that applies the model `facebook/s2t-medium-mustc-multilingual-st` to an existing
+        Pixeltable column `audio` of the table `tbl`, translating the audio to French:
+        >>> tbl['translation'] = speech2text_for_conditional_generation(
+        ...     tbl.audio,
+        ...     model_id='facebook/s2t-medium-mustc-multilingual-st',
+        ...     language='fr'
+        ... )
+    """
+    env.Env.get().require_package('transformers')
+    env.Env.get().require_package('torchaudio')
+    env.Env.get().require_package('sentencepiece')
+    device = resolve_torch_device('auto', allow_mps=False)  # Doesn't seem to work on 'mps'; use 'cpu' instead
+    import librosa
+    import torch
+    from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
+    # facebook/s2t-small-librispeech-asr
+    # facebook/s2t-small-mustc-en-fr-st
+    model = _lookup_model(model_id, Speech2TextForConditionalGeneration.from_pretrained, device=device)
+    processor = _lookup_processor(model_id, Speech2TextProcessor.from_pretrained)
+    assert isinstance(processor, Speech2TextProcessor)
+    if language is not None and language not in processor.tokenizer.lang_code_to_id:
+        raise excs.Error(
+            f"Language code '{language}' is not supported by the model '{model_id}'. "
+            f"Supported languages are: {list(processor.tokenizer.lang_code_to_id.keys())}")
+    forced_bos_token_id: Optional[int] = None if language is None else processor.tokenizer.lang_code_to_id[language]
+    # Get the model's sampling rate. Default to 16 kHz (the standard) if not in config
+    model_sampling_rate = getattr(model.config, 'sampling_rate', 16_000)
+    waveform, sampling_rate = librosa.load(audio, sr=model_sampling_rate, mono=True)
+    with torch.no_grad():
+        inputs = processor(
+            waveform,
+            sampling_rate=sampling_rate,
+            return_tensors='pt'
+        )
+        generated_ids = model.generate(**inputs.to(device), forced_bos_token_id=forced_bos_token_id).to('cpu')
+    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+    return transcription
 @pxt.udf
 def detr_to_coco(image: PIL.Image.Image, detr_info: dict[str, Any]) -> dict[str, Any]:
     """
@@ -386,14 +488,22 @@ def detr_to_coco(image: PIL.Image.Image, detr_info: dict[str, Any]) -> dict[str,
 T = TypeVar('T')
-def _lookup_model(model_id: str, create: Callable[[str], T], device: Optional[str] = None) -> T:
+def _lookup_model(
+    model_id: str,
+    create: Callable[..., T],
+    device: Optional[str] = None,
+    pass_device_to_create: bool = False
+) -> T:
     from torch import nn
     key = (model_id, create, device)  # For safety, include the `create` callable in the cache key
     if key not in _model_cache:
-        model = create(model_id)
+        if pass_device_to_create:
+            model = create(model_id, device=device)
+        else:
+            model = create(model_id)
         if isinstance(model, nn.Module):
-            if device is not None:
+            if not pass_device_to_create and device is not None:
                 model.to(device)
             model.eval()
         _model_cache[key] = model

pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

pixeltable 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl