PyPI - pixeltable - Versions diffs - 0.4.8__py3-none-any.whl → 0.4.10__py3-none-any.whl - Mend

pixeltable 0.4.8py3-none-any.whl → 0.4.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (29) hide show

pixeltable/__init__.py +1 -1
pixeltable/catalog/insertable_table.py +125 -28
pixeltable/catalog/table.py +10 -1
pixeltable/config.py +1 -0
pixeltable/env.py +57 -4
pixeltable/functions/__init__.py +2 -0
pixeltable/functions/audio.py +2 -1
pixeltable/functions/gemini.py +8 -0
pixeltable/functions/video.py +534 -81
pixeltable/functions/whisper.py +8 -0
pixeltable/functions/whisperx.py +177 -0
pixeltable/{ext/functions → functions}/yolox.py +0 -4
pixeltable/globals.py +3 -1
pixeltable/iterators/video.py +138 -0
pixeltable/metadata/__init__.py +3 -1
pixeltable/mypy/__init__.py +3 -0
pixeltable/mypy/mypy_plugin.py +123 -0
pixeltable/type_system.py +20 -4
pixeltable/utils/av.py +111 -0
pixeltable/utils/code.py +2 -1
pixeltable/utils/pydantic.py +60 -0
{pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/METADATA +1 -1
{pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/RECORD +26 -24
pixeltable/ext/__init__.py +0 -17
pixeltable/ext/functions/__init__.py +0 -11
pixeltable/ext/functions/whisperx.py +0 -77
{pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/WHEEL +0 -0
{pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/licenses/LICENSE +0 -0

pixeltable/functions/whisper.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Optional, Sequence
 import pixeltable as pxt
 from pixeltable.env import Env
+from pixeltable.utils.code import local_public_names
 if TYPE_CHECKING:
     from whisper import Whisper  # type: ignore[import-untyped]
@@ -90,3 +91,10 @@ def _lookup_model(model_id: str, device: str) -> 'Whisper':
 _model_cache: dict[tuple[str, str], 'Whisper'] = {}
+__all__ = local_public_names(__name__)
+def __dir__() -> list[str]:
+    return __all__

pixeltable/functions/whisperx.py ADDED Viewed

@@ -0,0 +1,177 @@
+from typing import TYPE_CHECKING, Any, Optional
+import numpy as np
+import pixeltable as pxt
+from pixeltable.config import Config
+from pixeltable.functions.util import resolve_torch_device
+from pixeltable.utils.code import local_public_names
+if TYPE_CHECKING:
+    from transformers import Wav2Vec2Model
+    from whisperx.asr import FasterWhisperPipeline  # type: ignore[import-untyped]
+    from whisperx.diarize import DiarizationPipeline  # type: ignore[import-untyped]
+@pxt.udf
+def transcribe(
+    audio: pxt.Audio,
+    *,
+    model: str,
+    diarize: bool = False,
+    compute_type: Optional[str] = None,
+    language: Optional[str] = None,
+    task: Optional[str] = None,
+    chunk_size: Optional[int] = None,
+    alignment_model_name: Optional[str] = None,
+    interpolate_method: Optional[str] = None,
+    return_char_alignments: Optional[bool] = None,
+    diarization_model_name: Optional[str] = None,
+    num_speakers: Optional[int] = None,
+    min_speakers: Optional[int] = None,
+    max_speakers: Optional[int] = None,
+) -> dict:
+    """
+    Transcribe an audio file using WhisperX.
+    This UDF runs a transcription model _locally_ using the WhisperX library,
+    equivalent to the WhisperX `transcribe` function, as described in the
+    [WhisperX library documentation](https://github.com/m-bain/whisperX).
+    If `diarize=True`, then speaker diarization will also be performed. Several of the UDF parameters are only valid if
+    `diarize=True`, as documented in the parameters list below.
+    __Requirements:__
+    - `pip install whisperx`
+    Args:
+        audio: The audio file to transcribe.
+        model: The name of the model to use for transcription.
+        diarize: Whether to perform speaker diarization.
+        compute_type: The compute type to use for the model (e.g., `'int8'`, `'float16'`). If `None`,
+            defaults to `'float16'` on CUDA devices and `'int8'` otherwise.
+        language: The language code for the transcription (e.g., `'en'` for English).
+        task: The task to perform (e.g., `'transcribe'` or `'translate'`). Defaults to `'transcribe'`.
+        chunk_size: The size of the audio chunks to process, in seconds. Defaults to `30`.
+        alignment_model_name: The name of the alignment model to use. If `None`, uses the default model for the given
+            language. Only valid if `diarize=True`.
+        interpolate_method: The method to use for interpolation of the alignment results. If not specified, uses the
+            WhisperX default (`'nearest'`). Only valid if `diarize=True`.
+        return_char_alignments: Whether to return character-level alignments. Defaults to `False`.
+            Only valid if `diarize=True`.
+        diarization_model_name: The name of the diarization model to use. Defaults to
+            `pyannote/speaker-diarization-3.1`. Only valid if `diarize=True`.
+        num_speakers: The number of speakers to expect in the audio. By default, the model with try to detect the
+            number of speakers. Only valid if `diarize=True`.
+        min_speakers: If specified, the minimum number of speakers to expect in the audio.
+            Only valid if `diarize=True`.
+        max_speakers: If specified, the maximum number of speakers to expect in the audio.
+            Only valid if `diarize=True`.
+    Returns:
+        A dictionary containing the audio transcription, diarization (if enabled), and various other metadata.
+    Examples:
+        Add a computed column that applies the model `tiny.en` to an existing Pixeltable column `tbl.audio`
+        of the table `tbl`:
+        >>> tbl.add_computed_column(result=transcribe(tbl.audio, model='tiny.en'))
+        Add a computed column that applies the model `tiny.en` to an existing Pixeltable column `tbl.audio`
+        of the table `tbl`, with speaker diarization enabled, expecting at least 2 speakers:
+        >>> tbl.add_computed_column(
+        ...     result=transcribe(
+        ...         tbl.audio, model='tiny.en', diarize=True, min_speakers=2
+        ...     )
+        ... )
+    """
+    import whisperx  # type: ignore[import-untyped]
+    if not diarize:
+        args = locals()
+        for param in (
+            'alignment_model_name',
+            'interpolate_method',
+            'return_char_alignments',
+            'diarization_model_name',
+            'num_speakers',
+            'min_speakers',
+            'max_speakers',
+        ):
+            if args[param] is not None:
+                raise pxt.Error(f'`{param}` can only be set if `diarize=True`')
+    device = resolve_torch_device('auto', allow_mps=False)
+    compute_type = compute_type or ('float16' if device == 'cuda' else 'int8')
+    transcription_model = _lookup_transcription_model(model, device, compute_type)
+    audio_array: np.ndarray = whisperx.load_audio(audio)
+    kwargs: dict[str, Any] = {'language': language, 'task': task}
+    if chunk_size is not None:
+        kwargs['chunk_size'] = chunk_size
+    result: dict[str, Any] = transcription_model.transcribe(audio_array, batch_size=16, **kwargs)
+    if diarize:
+        # Alignment
+        alignment_model, metadata = _lookup_alignment_model(result['language'], device, alignment_model_name)
+        kwargs = {}
+        if interpolate_method is not None:
+            kwargs['interpolate_method'] = interpolate_method
+        if return_char_alignments is not None:
+            kwargs['return_char_alignments'] = return_char_alignments
+        result = whisperx.align(result['segments'], alignment_model, metadata, audio_array, device, **kwargs)
+        # Diarization
+        diarization_model = _lookup_diarization_model(device, diarization_model_name)
+        diarization_segments = diarization_model(
+            audio_array, num_speakers=num_speakers, min_speakers=min_speakers, max_speakers=max_speakers
+        )
+        result = whisperx.assign_word_speakers(diarization_segments, result)
+    return result
+def _lookup_transcription_model(model: str, device: str, compute_type: str) -> 'FasterWhisperPipeline':
+    import whisperx
+    key = (model, device, compute_type)
+    if key not in _model_cache:
+        transcription_model = whisperx.load_model(model, device, compute_type=compute_type)
+        _model_cache[key] = transcription_model
+    return _model_cache[key]
+def _lookup_alignment_model(language_code: str, device: str, model_name: Optional[str]) -> tuple['Wav2Vec2Model', dict]:
+    import whisperx
+    key = (language_code, device, model_name)
+    if key not in _alignment_model_cache:
+        model, metadata = whisperx.load_align_model(language_code=language_code, device=device, model_name=model_name)
+        _alignment_model_cache[key] = (model, metadata)
+    return _alignment_model_cache[key]
+def _lookup_diarization_model(device: str, model_name: Optional[str]) -> 'DiarizationPipeline':
+    from whisperx.diarize import DiarizationPipeline
+    key = (device, model_name)
+    if key not in _diarization_model_cache:
+        auth_token = Config.get().get_string_value('auth_token', section='hf')
+        kwargs: dict[str, Any] = {'device': device, 'use_auth_token': auth_token}
+        if model_name is not None:
+            kwargs['model_name'] = model_name
+        _diarization_model_cache[key] = DiarizationPipeline(**kwargs)
+    return _diarization_model_cache[key]
+_model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
+_alignment_model_cache: dict[tuple[str, str, Optional[str]], tuple['Wav2Vec2Model', dict]] = {}
+_diarization_model_cache: dict[tuple[str, Optional[str]], 'DiarizationPipeline'] = {}
+__all__ = local_public_names(__name__)
+def __dir__() -> list[str]:
+    return __all__

pixeltable/{ext/functions → functions}/yolox.py RENAMED Viewed

@@ -20,8 +20,6 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
     Computes YOLOX object detections for the specified image. `model_id` should reference one of the models
     defined in the [YOLOX documentation](https://github.com/Megvii-BaseDetection/YOLOX).
-    YOLOX is part of the `pixeltable.ext` package: long-term support in Pixeltable is not guaranteed.
     __Requirements__:
     - `pip install pixeltable-yolox`
@@ -55,8 +53,6 @@ def yolo_to_coco(detections: dict) -> list:
     """
     Converts the output of a YOLOX object detection model to COCO format.
-    YOLOX is part of the `pixeltable.ext` package: long-term support in Pixeltable is not guaranteed.
     Args:
         detections: The output of a YOLOX object detection model, as returned by `yolox`.

pixeltable/globals.py CHANGED Viewed

@@ -3,9 +3,10 @@ from __future__ import annotations
 import logging
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, NamedTuple, Optional, Union
+from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, NamedTuple, Optional, Sequence, Union
 import pandas as pd
+import pydantic
 from pandas.io.formats.style import Styler
 from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share, type_system as ts
@@ -25,6 +26,7 @@ if TYPE_CHECKING:
         Path,  # OS paths, filenames, URLs
         Iterator[dict[str, Any]],  # iterator producing dictionaries of values
         RowData,  # list of dictionaries
+        Sequence[pydantic.BaseModel],  # list of Pydantic models
         DataFrame,  # Pixeltable DataFrame
         pd.DataFrame,  # pandas DataFrame
         datasets.Dataset,

pixeltable/iterators/video.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import logging
 import math
+import shutil
+import subprocess
 from fractions import Fraction
 from pathlib import Path
 from typing import Any, Optional
@@ -8,8 +10,11 @@ import av
 import pandas as pd
 import PIL.Image
+import pixeltable as pxt
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
+import pixeltable.utils.av as av_utils
+from pixeltable.utils.media_store import TempStore
 from .base import ComponentIterator
@@ -224,3 +229,136 @@ class FrameIterator(ComponentIterator):
         # then the iterator will step forward to the desired frame on the subsequent call to next().
         self.container.seek(seek_pos, backward=True, stream=self.container.streams.video[0])
         self.next_pos = pos
+class VideoSplitter(ComponentIterator):
+    """
+    Iterator over segments of a video file, which is split into fixed-size segments of length `segment_duration`
+    seconds.
+    Args:
+        segment_duration: Video segment duration in seconds
+        overlap: Overlap between consecutive segments in seconds.
+        min_segment_duration: Drop the last segment if it is smaller than min_segment_duration
+    """
+    # Input parameters
+    video_path: Path
+    segment_duration: float
+    overlap: float
+    min_segment_duration: float
+    # Video metadata
+    video_duration: float
+    video_time_base: Fraction
+    video_start_time: int
+    # position tracking
+    next_segment_start: float
+    next_segment_start_pts: int
+    def __init__(self, video: str, segment_duration: float, *, overlap: float = 0.0, min_segment_duration: float = 0.0):
+        assert segment_duration > 0.0
+        assert segment_duration >= min_segment_duration
+        assert overlap < segment_duration
+        video_path = Path(video)
+        assert video_path.exists() and video_path.is_file()
+        if not shutil.which('ffmpeg'):
+            raise pxt.Error('ffmpeg is not installed or not in PATH. Please install ffmpeg to use VideoSplitter.')
+        self.video_path = video_path
+        self.segment_duration = segment_duration
+        self.overlap = overlap
+        self.min_segment_duration = min_segment_duration
+        with av.open(str(video_path)) as container:
+            video_stream = container.streams.video[0]
+            self.video_time_base = video_stream.time_base
+            self.video_start_time = video_stream.start_time or 0
+        self.next_segment_start = float(self.video_start_time * self.video_time_base)
+        self.next_segment_start_pts = self.video_start_time
+    @classmethod
+    def input_schema(cls) -> dict[str, ts.ColumnType]:
+        return {
+            'video': ts.VideoType(nullable=False),
+            'segment_duration': ts.FloatType(nullable=False),
+            'overlap': ts.FloatType(nullable=True),
+            'min_segment_duration': ts.FloatType(nullable=True),
+        }
+    @classmethod
+    def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
+        param_names = ['segment_duration', 'overlap', 'min_segment_duration']
+        params = dict(zip(param_names, args))
+        params.update(kwargs)
+        segment_duration = params['segment_duration']
+        min_segment_duration = params.get('min_segment_duration', 0.0)
+        overlap = params.get('overlap', 0.0)
+        if segment_duration <= 0.0:
+            raise excs.Error('segment_duration must be a positive number')
+        if segment_duration < min_segment_duration:
+            raise excs.Error('segment_duration must be at least min_segment_duration')
+        if overlap >= segment_duration:
+            raise excs.Error('overlap must be less than segment_duration')
+        return {
+            'segment_start': ts.FloatType(nullable=False),
+            'segment_start_pts': ts.IntType(nullable=False),
+            'segment_end': ts.FloatType(nullable=False),
+            'segment_end_pts': ts.IntType(nullable=False),
+            'video_segment': ts.VideoType(nullable=False),
+        }, []
+    def __next__(self) -> dict[str, Any]:
+        segment_path = str(TempStore.create_path(extension='.mp4'))
+        try:
+            cmd = av_utils.ffmpeg_clip_cmd(
+                str(self.video_path), segment_path, self.next_segment_start, self.segment_duration
+            )
+            _ = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            # use the actual duration
+            segment_duration = av_utils.get_video_duration(segment_path)
+            if segment_duration - self.overlap == 0.0:
+                # we're done
+                Path(segment_path).unlink()
+                raise StopIteration
+            if segment_duration < self.min_segment_duration:
+                Path(segment_path).unlink()
+                raise StopIteration
+            segment_end = self.next_segment_start + segment_duration
+            segment_end_pts = self.next_segment_start_pts + round(segment_duration / self.video_time_base)
+            result = {
+                'segment_start': self.next_segment_start,
+                'segment_start_pts': self.next_segment_start_pts,
+                'segment_end': segment_end,
+                'segment_end_pts': segment_end_pts,
+                'video_segment': segment_path,
+            }
+            self.next_segment_start = segment_end - self.overlap
+            self.next_segment_start_pts = segment_end_pts - round(self.overlap / self.video_time_base)
+            return result
+        except subprocess.CalledProcessError as e:
+            if Path(segment_path).exists():
+                Path(segment_path).unlink()
+            error_msg = f'ffmpeg failed with return code {e.returncode}'
+            if e.stderr:
+                error_msg += f': {e.stderr.strip()}'
+            raise pxt.Error(error_msg) from e
+    def close(self) -> None:
+        pass
+    def set_pos(self, pos: int) -> None:
+        pass

pixeltable/metadata/__init__.py CHANGED Viewed

@@ -25,6 +25,7 @@ def create_system_info(engine: sql.engine.Engine) -> None:
     """Create the system metadata record"""
     system_md = SystemInfoMd(schema_version=VERSION)
     record = SystemInfo(md=dataclasses.asdict(system_md))
+    _logger.debug(f'Creating pixeltable system info record {record}')
     with orm.Session(engine, future=True) as session:
         # Write system metadata only once for idempotency
         if session.query(SystemInfo).count() == 0:
@@ -54,7 +55,8 @@ for _, modname, _ in pkgutil.iter_modules([os.path.dirname(__file__) + '/convert
 def upgrade_md(engine: sql.engine.Engine) -> None:
     """Upgrade the metadata schema to the current version"""
     with orm.Session(engine) as session:
-        system_info = session.query(SystemInfo).one().md
+        # Get exclusive lock on SystemInfo row
+        system_info = session.query(SystemInfo).with_for_update().one().md
         md_version = system_info['schema_version']
         assert isinstance(md_version, int)
         _logger.info(f'Current database version: {md_version}, installed version: {VERSION}')

pixeltable/mypy/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .mypy_plugin import plugin
+__all__ = ['plugin']

pixeltable/mypy/mypy_plugin.py ADDED Viewed

@@ -0,0 +1,123 @@
+from typing import Callable, ClassVar, Optional
+from mypy import nodes
+from mypy.plugin import AnalyzeTypeContext, ClassDefContext, FunctionContext, MethodSigContext, Plugin
+from mypy.plugins.common import add_attribute_to_class, add_method_to_class
+from mypy.types import AnyType, FunctionLike, Instance, NoneType, Type, TypeOfAny
+import pixeltable as pxt
+from pixeltable import exprs
+class PxtPlugin(Plugin):
+    __UDA_FULLNAME = f'{pxt.uda.__module__}.{pxt.uda.__name__}'
+    __ARRAY_GETITEM_FULLNAME = f'{pxt.Array.__module__}.{pxt.Array.__name__}.__class_getitem__'
+    __ADD_COLUMN_FULLNAME = f'{pxt.Table.__module__}.{pxt.Table.__name__}.{pxt.Table.add_column.__name__}'
+    __ADD_COMPUTED_COLUMN_FULLNAME = (
+        f'{pxt.Table.__module__}.{pxt.Table.__name__}.{pxt.Table.add_computed_column.__name__}'
+    )
+    __TYPE_MAP: ClassVar[dict] = {
+        pxt.Json: 'typing.Any',
+        pxt.Array: 'numpy.ndarray',
+        pxt.Image: 'PIL.Image.Image',
+        pxt.Video: 'builtins.str',
+        pxt.Audio: 'builtins.str',
+        pxt.Document: 'builtins.str',
+    }
+    __FULLNAME_MAP: ClassVar[dict] = {f'{k.__module__}.{k.__name__}': v for k, v in __TYPE_MAP.items()}
+    def get_function_hook(self, fullname: str) -> Optional[Callable[[FunctionContext], Type]]:
+        return adjust_uda_type
+    def get_type_analyze_hook(self, fullname: str) -> Optional[Callable[[AnalyzeTypeContext], Type]]:
+        if fullname in self.__FULLNAME_MAP:
+            subst_name = self.__FULLNAME_MAP[fullname]
+            return lambda ctx: adjust_pxt_type(ctx, subst_name)
+        return None
+    def get_method_signature_hook(self, fullname: str) -> Optional[Callable[[MethodSigContext], FunctionLike]]:
+        if fullname in (self.__ADD_COLUMN_FULLNAME, self.__ADD_COMPUTED_COLUMN_FULLNAME):
+            return adjust_kwargs
+        return None
+    def get_class_decorator_hook_2(self, fullname: str) -> Optional[Callable[[ClassDefContext], bool]]:
+        if fullname == self.__UDA_FULLNAME:
+            return adjust_uda_methods
+        return None
+def plugin(version: str) -> type:
+    return PxtPlugin
+_AGGREGATOR_FULLNAME = f'{pxt.Aggregator.__module__}.{pxt.Aggregator.__name__}'
+_FN_CALL_FULLNAME = f'{exprs.Expr.__module__}.{exprs.Expr.__name__}'
+def adjust_uda_type(ctx: FunctionContext) -> Type:
+    """
+    Mypy doesn't understand that a class with a @uda decorator isn't actually a class, so it assumes
+    that sum(expr), for example, actually returns an instance of sum. We correct this by changing the
+    return type of any subclass of `Aggregator` to `FunctionCall`.
+    """
+    ret_type = ctx.default_return_type
+    if isinstance(ret_type, Instance) and (
+        ret_type.type.fullname == _AGGREGATOR_FULLNAME
+        or any(base.type.fullname == _AGGREGATOR_FULLNAME for base in ret_type.type.bases)
+    ):
+        ret_type = AnyType(TypeOfAny.special_form)
+    return ret_type
+def adjust_pxt_type(ctx: AnalyzeTypeContext, subst_name: str) -> Type:
+    """
+    Replaces the special Pixeltable classes (such as pxt.Array) with their standard equivalents (such as np.ndarray).
+    """
+    if subst_name == 'typing.Any':
+        return AnyType(TypeOfAny.special_form)
+    return ctx.api.named_type(subst_name, [])
+def adjust_kwargs(ctx: MethodSigContext) -> FunctionLike:
+    """
+    Mypy has a "feature" where it will spit out multiple warnings if a method with signature
+    ```
+    def my_func(*, arg1: int, arg2: str, **kwargs: Expr)
+    ```
+    (for example) is called with bare kwargs:
+    ```
+    my_func(my_kwarg=value)
+    ```
+    This is a disaster for type-checking of add_column and add_computed_column. Here we adjust the signature so
+    that mypy thinks it is simply
+    ```
+    def my_func(**kwargs: Any)
+    ```
+    thereby avoiding any type-checking errors. For details, see: <https://github.com/python/mypy/issues/18481>
+    """
+    sig = ctx.default_signature
+    new_arg_names = sig.arg_names[-1:]
+    new_arg_types = [AnyType(TypeOfAny.special_form)]
+    new_arg_kinds = sig.arg_kinds[-1:]
+    return sig.copy_modified(arg_names=new_arg_names, arg_types=new_arg_types, arg_kinds=new_arg_kinds)
+def adjust_uda_methods(ctx: ClassDefContext) -> bool:
+    """
+    Mypy does not handle the `@pxt.uda` aggregator well; it continues to treat the decorated class as a class,
+    even though it has been replaced by an `AggregateFunction`. Here we add static methods to the class that
+    imitate various (instance) methods of `AggregateFunction` so that they can be properly type-checked.
+    """
+    list_type = ctx.api.named_type('builtins.list', [AnyType(TypeOfAny.special_form)])
+    fn_arg = nodes.Argument(nodes.Var('fn'), AnyType(TypeOfAny.special_form), None, nodes.ARG_POS)
+    args_arg = nodes.Argument(nodes.Var('args'), AnyType(TypeOfAny.special_form), None, nodes.ARG_STAR)
+    kwargs_arg = nodes.Argument(nodes.Var('kwargs'), AnyType(TypeOfAny.special_form), None, nodes.ARG_STAR2)
+    add_method_to_class(ctx.api, ctx.cls, '__init__', args=[args_arg, kwargs_arg], return_type=NoneType())
+    add_method_to_class(
+        ctx.api, ctx.cls, 'to_sql', args=[fn_arg], return_type=AnyType(TypeOfAny.special_form), is_staticmethod=True
+    )
+    add_method_to_class(
+        ctx.api, ctx.cls, 'overload', args=[fn_arg], return_type=AnyType(TypeOfAny.special_form), is_staticmethod=True
+    )
+    add_attribute_to_class(ctx.api, ctx.cls, 'signatures', typ=list_type, is_classvar=True)
+    return True

pixeltable/type_system.py CHANGED Viewed

@@ -9,8 +9,11 @@ import types
 import typing
 import urllib.parse
 import urllib.request
+from pathlib import Path
 from typing import Any, ClassVar, Iterable, Literal, Mapping, Optional, Sequence, Union
+from typing import _GenericAlias  # type: ignore[attr-defined]  # isort: skip
 import av
 import jsonschema
 import jsonschema.protocols
@@ -24,8 +27,6 @@ from typing_extensions import _AnnotatedAlias
 import pixeltable.exceptions as excs
 from pixeltable.utils import parse_local_file_path
-from typing import _GenericAlias  # type: ignore[attr-defined]  # isort: skip
 class ColumnType:
     @enum.unique
@@ -292,7 +293,11 @@ class ColumnType:
     @classmethod
     def from_python_type(
-        cls, t: type | _GenericAlias, nullable_default: bool = False, allow_builtin_types: bool = True
+        cls,
+        t: type | _GenericAlias,
+        nullable_default: bool = False,
+        allow_builtin_types: bool = True,
+        infer_pydantic_json: bool = False,
     ) -> Optional[ColumnType]:
         """
         Convert a Python type into a Pixeltable `ColumnType` instance.
@@ -305,6 +310,8 @@ class ColumnType:
                 allowed (as in UDF definitions). If False, then only Pixeltable types such as `pxt.String`,
                 `pxt.Int`, etc., will be allowed (as in schema definitions). `Optional` and `Required`
                 designations will be allowed regardless.
+            infer_pydantic_json: If True, accepts an extended set of built-ins (eg, Enum, Path) and returns the type to
+                which pydantic.BaseModel.model_dump(mode='json') serializes it.
         """
         origin = typing.get_origin(t)
         type_args = typing.get_args(t)
@@ -314,7 +321,9 @@ class ColumnType:
                 # `t` is a type of the form Optional[T] (equivalently, T | None or None | T).
                 # We treat it as the underlying type but with nullable=True.
                 underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
-                underlying = cls.from_python_type(underlying_py_type, allow_builtin_types=allow_builtin_types)
+                underlying = cls.from_python_type(
+                    underlying_py_type, allow_builtin_types=allow_builtin_types, infer_pydantic_json=infer_pydantic_json
+                )
                 if underlying is not None:
                     return underlying.copy(nullable=True)
         elif origin is Required:
@@ -341,6 +350,13 @@ class ColumnType:
                     if literal_type is None:
                         return None
                     return literal_type.copy(nullable=(literal_type.nullable or nullable_default))
+                if infer_pydantic_json and isinstance(t, type) and issubclass(t, enum.Enum):
+                    literal_type = cls.infer_common_literal_type(member.value for member in t)
+                    if literal_type is None:
+                        return None
+                    return literal_type.copy(nullable=(literal_type.nullable or nullable_default))
+                if infer_pydantic_json and t is Path:
+                    return StringType(nullable=nullable_default)
                 if t is str:
                     return StringType(nullable=nullable_default)
                 if t is int:

pixeltable 0.4.8__py3-none-any.whl → 0.4.10__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.8py3-none-any.whl → 0.4.10py3-none-any.whl