PyPI - pixeltable - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

pixeltable 0.2.4py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (56) hide show

pixeltable/catalog/column.py +25 -48
pixeltable/catalog/insertable_table.py +7 -4
pixeltable/catalog/table.py +163 -57
pixeltable/catalog/table_version.py +416 -140
pixeltable/catalog/table_version_path.py +2 -2
pixeltable/client.py +0 -4
pixeltable/dataframe.py +65 -21
pixeltable/env.py +16 -1
pixeltable/exec/cache_prefetch_node.py +1 -1
pixeltable/exec/in_memory_data_node.py +11 -7
pixeltable/exprs/comparison.py +3 -3
pixeltable/exprs/data_row.py +5 -1
pixeltable/exprs/literal.py +16 -4
pixeltable/exprs/row_builder.py +8 -40
pixeltable/ext/__init__.py +5 -0
pixeltable/ext/functions/yolox.py +92 -0
pixeltable/func/aggregate_function.py +15 -15
pixeltable/func/expr_template_function.py +9 -1
pixeltable/func/globals.py +24 -14
pixeltable/func/signature.py +18 -12
pixeltable/func/udf.py +7 -2
pixeltable/functions/__init__.py +8 -8
pixeltable/functions/eval.py +7 -8
pixeltable/functions/huggingface.py +47 -19
pixeltable/functions/openai.py +2 -2
pixeltable/functions/util.py +11 -0
pixeltable/index/__init__.py +2 -0
pixeltable/index/base.py +49 -0
pixeltable/index/embedding_index.py +95 -0
pixeltable/metadata/schema.py +45 -22
pixeltable/plan.py +15 -34
pixeltable/store.py +38 -41
pixeltable/tests/conftest.py +5 -11
pixeltable/tests/ext/test_yolox.py +21 -0
pixeltable/tests/functions/test_fireworks.py +1 -0
pixeltable/tests/functions/test_huggingface.py +2 -2
pixeltable/tests/functions/test_openai.py +15 -5
pixeltable/tests/functions/test_together.py +1 -0
pixeltable/tests/test_component_view.py +14 -5
pixeltable/tests/test_dataframe.py +19 -18
pixeltable/tests/test_exprs.py +99 -102
pixeltable/tests/test_function.py +51 -43
pixeltable/tests/test_index.py +138 -0
pixeltable/tests/test_migration.py +2 -1
pixeltable/tests/test_snapshot.py +24 -1
pixeltable/tests/test_table.py +101 -25
pixeltable/tests/test_types.py +30 -0
pixeltable/tests/test_video.py +16 -16
pixeltable/tests/test_view.py +5 -0
pixeltable/tests/utils.py +43 -9
pixeltable/tool/create_test_db_dump.py +16 -0
pixeltable/type_system.py +37 -45
{pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/METADATA +5 -4
{pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/RECORD +56 -49
{pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
{pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0

pixeltable/catalog/table_version_path.py CHANGED Viewed

@@ -101,8 +101,8 @@ class TableVersionPath:
         return DataFrame(self).__getitem__(index)
     def columns(self) -> List[Column]:
-        """Return all columns visible in this tbl version path, including columns from bases"""
-        result = self.tbl_version.cols.copy()
+        """Return all user columns visible in this tbl version path, including columns from bases"""
+        result = list(self.tbl_version.cols_by_name.values())
         if self.base is not None:
             base_cols = self.base.columns()
             # we only include base columns that don't conflict with one of our column names

pixeltable/client.py CHANGED Viewed

@@ -132,10 +132,6 @@ class Client:
             Create a table with an int and a string column:
             >>> table = cl.create_table('my_table', schema={'col1': IntType(), 'col2': StringType()})
-            Create a table with a single indexed image column:
-            >>> table = cl.create_table('my_table', schema={'col1': {'type': ImageType(), 'indexed': True}})
         """
         path = catalog.Path(path_str)
         self.catalog.paths.check_is_valid(path, expected=None)

pixeltable/dataframe.py CHANGED Viewed

@@ -11,6 +11,8 @@ import traceback
 from pathlib import Path
 from typing import List, Optional, Any, Dict, Generator, Tuple, Set
+import PIL.Image
+import cv2
 import pandas as pd
 import pandas.io.formats.style
 import sqlalchemy as sql
@@ -31,15 +33,6 @@ __all__ = [
 _logger = logging.getLogger('pixeltable')
-def _format_img(img: object) -> str:
-    """
-    Create <img> tag for Image object.
-    """
-    assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
-    with io.BytesIO() as buffer:
-        img.save(buffer, 'jpeg')
-        img_base64 = base64.b64encode(buffer.getvalue()).decode()
-        return f'<div style="width:200px;"><img src="data:image/jpeg;base64,{img_base64}" width="200" /></div>'
 def _create_source_tag(file_path: str) -> str:
     abs_path = Path(file_path)
@@ -50,21 +43,17 @@ def _create_source_tag(file_path: str) -> str:
     mime_attr = f'type="{mime}"' if mime is not None else ''
     return f'<source src="{src_url}" {mime_attr} />'
-def _format_video(file_path: str) -> str:
-    return f'<video controls>{_create_source_tag(file_path)}</video>'
-def _format_audio(file_path: str) -> str:
-    return f'<audio controls>{_create_source_tag(file_path)}</audio>'
 class DataFrameResultSet:
     def __init__(self, rows: List[List[Any]], col_names: List[str], col_types: List[ColumnType]):
         self._rows = rows
         self._col_names = col_names
         self._col_types = col_types
         self._formatters = {
-            ts.ImageType: _format_img,
-            ts.VideoType: _format_video,
-            ts.AudioType: _format_audio,
+            ts.ImageType: self._format_img,
+            ts.VideoType: self._format_video,
+            ts.AudioType: self._format_audio,
         }
     def __len__(self) -> int:
@@ -85,9 +74,7 @@ class DataFrameResultSet:
             for col_name, col_type in zip(self._col_names, self._col_types)
             if col_type.__class__ in self._formatters
         }
-        # TODO: why does mypy complain about formatters having an incorrect type?
-        return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)  # type: ignore[arg-type]
+        return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)
     def __str__(self) -> str:
         return self.to_pandas().to_string()
@@ -102,6 +89,64 @@ class DataFrameResultSet:
     def _row_to_dict(self, row_idx: int) -> Dict[str, Any]:
         return {self._col_names[i]: self._rows[row_idx][i] for i in range(len(self._col_names))}
+    # Formatters
+    def _format_img(self, img: Image.Image) -> str:
+        """
+        Create <img> tag for Image object.
+        """
+        assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
+        # Try to make it look decent in a variety of display scenarios
+        if len(self._rows) > 1:
+            width = 240  # Multiple rows: display small images
+        elif len(self._col_names) > 1:
+            width = 480  # Multiple columns: display medium images
+        else:
+            width = 640  # A single image: larger display
+        with io.BytesIO() as buffer:
+            img.save(buffer, 'jpeg')
+            img_base64 = base64.b64encode(buffer.getvalue()).decode()
+            return f'''
+            <div style="width:{width}px;">
+                <img src="data:image/jpeg;base64,{img_base64}" width="{width}" />
+            </div>
+            '''
+    def _format_video(self, file_path: str) -> str:
+        thumb_tag = ""
+        # Attempt to extract the first frame of the video to use as a thumbnail,
+        # so that the notebook can be exported as HTML and viewed in contexts where
+        # the video itself is not accessible.
+        # TODO(aaron-siegel): If the video is backed by a concrete external URL,
+        # should we link to that instead?
+        video_reader = cv2.VideoCapture(str(file_path))
+        if video_reader.isOpened():
+            status, img_array = video_reader.read()
+            if status:
+                img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
+                thumb = PIL.Image.fromarray(img_array)
+                with io.BytesIO() as buffer:
+                    thumb.save(buffer, 'jpeg')
+                    thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
+                    thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
+            video_reader.release()
+        if len(self._rows) > 1:
+            width = 320
+        elif len(self._col_names) > 1:
+            width = 480
+        else:
+            width = 800
+        return f'''
+        <div style="width:{width}px;">
+            <video controls width="{width}" {thumb_tag}>
+                {_create_source_tag(file_path)}
+            </video>
+        </div>
+        '''
+    def _format_audio(self, file_path: str) -> str:
+        return f'<audio controls>{_create_source_tag(file_path)}</audio>'
     def __getitem__(self, index: Any) -> Any:
         if isinstance(index, str):
             if index not in self._col_names:
@@ -173,7 +218,6 @@ class AnalysisInfo:
             self.filter.release()
 class DataFrame:
     def __init__(
             self, tbl: catalog.TableVersionPath,

pixeltable/env.py CHANGED Viewed

@@ -10,8 +10,8 @@ import os
 import socketserver
 import sys
 import threading
-import typing
 import uuid
+import warnings
 from pathlib import Path
 from typing import Callable, Optional, Dict, Any, List
@@ -19,6 +19,7 @@ import pgserver
 import sqlalchemy as sql
 import yaml
 from sqlalchemy_utils.functions import database_exists, create_database, drop_database
+from tqdm import TqdmWarning
 import pixeltable.exceptions as excs
 from pixeltable import metadata
@@ -188,11 +189,21 @@ class Env:
         fh = logging.FileHandler(self._log_dir / self._logfilename, mode='w')
         fh.setFormatter(logging.Formatter(self._log_fmt_str))
         self._logger.addHandler(fh)
+        # configure sqlalchemy logging
         sql_logger = logging.getLogger('sqlalchemy.engine')
         sql_logger.setLevel(logging.INFO)
         sql_logger.addHandler(fh)
         sql_logger.propagate = False
+        # configure pyav logging
+        av_logfilename = self._logfilename.replace('.log', '_av.log')
+        av_fh = logging.FileHandler(self._log_dir / av_logfilename, mode='w')
+        av_fh.setFormatter(logging.Formatter(self._log_fmt_str))
+        av_logger = logging.getLogger('libav')
+        av_logger.addHandler(av_fh)
+        av_logger.propagate = False
         # empty tmp dir
         for path in glob.glob(f'{self._tmp_dir}/*'):
             os.remove(path)
@@ -229,6 +240,9 @@ class Env:
         self._set_up_runtime()
         self.log_to_stdout(False)
+        # Disable spurious warnings
+        warnings.simplefilter("ignore", category=TqdmWarning)
     def upgrade_metadata(self) -> None:
         metadata.upgrade_md(self._sa_engine)
@@ -320,6 +334,7 @@ class Env:
         check('torchvision')
         check('transformers')
         check('sentence_transformers')
+        check('yolox')
         check('boto3')
         check('pyarrow')
         check('spacy')  # TODO: deal with en-core-web-sm

pixeltable/exec/cache_prefetch_node.py CHANGED Viewed

@@ -89,7 +89,7 @@ class CachePrefetchNode(ExecNode):
         # preserve the file extension, if there is one
         extension = ''
         if parsed.path != '':
-            p = Path(urllib.parse.unquote(parsed.path))
+            p = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed.path)))
             extension = p.suffix
         tmp_path = env.Env.get().create_tmp_path(extension=extension)
         try:

pixeltable/exec/in_memory_data_node.py CHANGED Viewed

@@ -29,18 +29,21 @@ class InMemoryDataNode(ExecNode):
     def _open(self) -> None:
         """Create row batch and populate with self.input_rows"""
-        column_info = {info.col.name: info for info in self.row_builder.output_slot_idxs()}
+        column_info = {info.col.id: info for info in self.row_builder.output_slot_idxs()}
+        # exclude system columns
+        user_column_info = {info.col.name: info for _, info in column_info.items() if info.col.name is not None}
         # stored columns that are not computed
-        inserted_column_names = set([
-            info.col.name for info in self.row_builder.output_slot_idxs()
+        inserted_col_ids = set([
+            info.col.id for info in self.row_builder.output_slot_idxs()
             if info.col.is_stored and not info.col.is_computed
         ])
         self.output_rows = DataRowBatch(self.tbl, self.row_builder, len(self.input_rows))
         for row_idx, input_row in enumerate(self.input_rows):
             # populate the output row with the values provided in the input row
+            input_col_ids: List[int] = []
             for col_name, val in input_row.items():
-                col_info = column_info.get(col_name)
+                col_info = user_column_info.get(col_name)
                 assert col_info is not None
                 if col_info.col.col_type.is_image_type() and isinstance(val, bytes):
@@ -49,11 +52,12 @@ class InMemoryDataNode(ExecNode):
                     open(path, 'wb').write(val)
                     val = path
                 self.output_rows[row_idx][col_info.slot_idx] = val
+                input_col_ids.append(col_info.col.id)
             # set the remaining stored non-computed columns to null
-            null_col_names = inserted_column_names - set(input_row.keys())
-            for col_name in null_col_names:
-                col_info = column_info.get(col_name)
+            null_col_ids = inserted_col_ids - set(input_col_ids)
+            for col_id in null_col_ids:
+                col_info = column_info.get(col_id)
                 assert col_info is not None
                 self.output_rows[row_idx][col_info.slot_idx] = None

pixeltable/exprs/comparison.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from __future__ import annotations
 from typing import Optional, List, Any, Dict, Tuple
 import sqlalchemy as sql
-from .globals import ComparisonOperator
+from .data_row import DataRow
 from .expr import Expr
+from .globals import ComparisonOperator
 from .predicate import Predicate
-from .data_row import DataRow
 from .row_builder import RowBuilder
-import pixeltable.catalog as catalog
 class Comparison(Predicate):

pixeltable/exprs/data_row.py CHANGED Viewed

@@ -5,6 +5,8 @@ import urllib.parse
 import urllib.request
 from typing import Optional, List, Any, Tuple
+import sqlalchemy as sql
+import pgvector.sqlalchemy
 import PIL
 import numpy as np
@@ -110,7 +112,7 @@ class DataRow:
         return self.vals[index]
-    def get_stored_val(self, index: object) -> Any:
+    def get_stored_val(self, index: object, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
         """Return the value that gets stored in the db"""
         assert self.excs[index] is None
         if not self.has_val[index]:
@@ -125,6 +127,8 @@ class DataRow:
         if self.vals[index] is not None and index in self.array_slot_idxs:
             assert isinstance(self.vals[index], np.ndarray)
             np_array = self.vals[index]
+            if sa_col_type is not None and isinstance(sa_col_type, pgvector.sqlalchemy.Vector):
+                return np_array
             buffer = io.BytesIO()
             np.save(buffer, np_array)
             return buffer.getvalue()

pixeltable/exprs/literal.py CHANGED Viewed

@@ -1,13 +1,16 @@
 from __future__ import annotations
+import datetime
 from typing import Optional, List, Any, Dict, Tuple
 import sqlalchemy as sql
-from .expr import Expr
+import pixeltable.exceptions as excs
+import pixeltable.type_system as ts
 from .data_row import DataRow
+from .expr import Expr
 from .row_builder import RowBuilder
-import pixeltable.catalog as catalog
-import pixeltable.type_system as ts
 class Literal(Expr):
     def __init__(self, val: Any, col_type: Optional[ts.ColumnType] = None):
@@ -46,9 +49,18 @@ class Literal(Expr):
         data_row[self.slot_idx] = self.val
     def _as_dict(self) -> Dict:
-        return {'val': self.val, **super()._as_dict()}
+        # For some types, we need to explictly record their type, because JSON does not know
+        # how to interpret them unambiguously
+        if self.col_type.is_timestamp_type():
+            return {'val': self.val.isoformat(), 'val_t': self.col_type._type.name, **super()._as_dict()}
+        else:
+            return {'val': self.val, **super()._as_dict()}
     @classmethod
     def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
         assert 'val' in d
+        if 'val_t' in d:
+            val_t = d['val_t']
+            assert val_t == ts.ColumnType.Type.TIMESTAMP.name
+            return cls(datetime.datetime.fromisoformat(d['val']))
         return cls(d['val'])

pixeltable/exprs/row_builder.py CHANGED Viewed

@@ -54,14 +54,12 @@ class RowBuilder:
         target_exprs: List[Expr]  # exprs corresponding to target_slot_idxs
     def __init__(
-            self, output_exprs: List[Expr], columns: List[catalog.Column],
-            indices: List[Tuple[catalog.Column, func.Function]], input_exprs: List[Expr]
+            self, output_exprs: List[Expr], columns: List[catalog.Column], input_exprs: List[Expr]
     ):
         """
         Args:
             output_exprs: list of Exprs to be evaluated
             columns: list of columns to be materialized
-            indices: list of embeddings to be materialized (Tuple[indexed column, embedding function])
         """
         self.unique_exprs = ExprSet()  # dependencies precede their dependents
         self.next_slot_idx = 0
@@ -73,7 +71,6 @@ class RowBuilder:
         # output exprs: all exprs the caller wants to materialize
         # - explicitly requested output_exprs
         # - values for computed columns
-        # - embedding values for indices
         resolve_cols = set(columns)
         self.output_exprs = [
             self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
@@ -97,21 +94,6 @@ class RowBuilder:
                 ref = self._record_unique_expr(ref, recursive=False)
                 self.add_table_column(col, ref.slot_idx)
-        # record indices; indexed by slot_idx
-        self.index_columns: List[catalog.Column] = []
-        for col, embedding_fn in indices:
-            # we assume that the parameter of the embedding function is a ref to an image column
-            assert col.col_type.is_image_type()
-            # construct expr to compute embedding; explicitly resize images to the required size
-            target_img_type = next(iter(embedding_fn.signature.parameters.values())).col_type
-            expr = embedding_fn(ColumnRef(col).resize(target_img_type.size))
-            expr = self._record_unique_expr(expr, recursive=True)
-            self.output_exprs.append(expr)
-            if len(self.index_columns) <= expr.slot_idx:
-                # pad to slot_idx
-                self.index_columns.extend([None] * (expr.slot_idx - len(self.index_columns) + 1))
-            self.index_columns[expr.slot_idx] = col
         # default eval ctx: all output exprs
         self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
@@ -170,13 +152,6 @@ class RowBuilder:
         """Return ColumnSlotIdx for output columns"""
         return self.table_columns
-    def index_slot_idxs(self) -> List[ColumnSlotIdx]:
-        """Return ColumnSlotIdx for index columns"""
-        return [
-            ColumnSlotIdx(self.output_columns[i], i) for i in range(len(self.index_columns))
-            if self.output_columns[i] is not None
-        ]
     @property
     def num_materialized(self) -> int:
         return self.next_slot_idx
@@ -334,22 +309,15 @@ class RowBuilder:
                 exc = data_row.get_exc(slot_idx)
                 num_excs += 1
                 exc_col_ids.add(col.id)
-                table_row[col.storage_name()] = None
-                table_row[col.errortype_storage_name()] = type(exc).__name__
-                table_row[col.errormsg_storage_name()] = str(exc)
+                table_row[col.store_name()] = None
+                table_row[col.errortype_store_name()] = type(exc).__name__
+                table_row[col.errormsg_store_name()] = str(exc)
             else:
-                val = data_row.get_stored_val(slot_idx)
-                table_row[col.storage_name()] = val
+                val = data_row.get_stored_val(slot_idx, col.sa_col.type)
+                table_row[col.store_name()] = val
                 # we unfortunately need to set these, even if there are no errors
-                table_row[col.errortype_storage_name()] = None
-                table_row[col.errormsg_storage_name()] = None
-        for slot_idx, col in enumerate(self.index_columns):
-            if col is None:
-                continue
-            # don't use get_stored_val() here, we need to pass in the ndarray
-            val = data_row[slot_idx]
-            table_row[col.index_storage_name()] = val
+                table_row[col.errortype_store_name()] = None
+                table_row[col.errormsg_store_name()] = None
         return table_row, num_excs

pixeltable/ext/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""
+Extended integrations for Pixeltable. This package contains experimental or demonstration features that
+are not intended for production use. Long-term support cannot be guaranteed, usually because the features
+have dependencies whose future support is unclear.
+"""

pixeltable/ext/functions/yolox.py ADDED Viewed

@@ -0,0 +1,92 @@
+import logging
+from pathlib import Path
+from typing import Iterable, Iterator
+from urllib.request import urlretrieve
+import PIL.Image
+import numpy as np
+import torch
+from yolox.data import ValTransform
+from yolox.exp import get_exp, Exp
+from yolox.models import YOLOX
+from yolox.utils import postprocess
+import pixeltable as pxt
+from pixeltable import env
+from pixeltable.func import Batch
+from pixeltable.functions.util import resolve_torch_device
+_logger = logging.getLogger('pixeltable')
+@pxt.udf(batch_size=4)
+def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
+    """
+    Runs the specified YOLOX object detection model on an image.
+    YOLOX support is part of the `pixeltable.ext` package: long-term support is not guaranteed, and it is not
+    intended for use in production applications.
+    Parameters:
+    - `model_id` - one of: `yolox_nano, `yolox_tiny`, `yolox_s`, `yolox_m`, `yolox_l`, `yolox_x`
+    - `threshold` - the threshold for object detection
+    """
+    model, exp = _lookup_model(model_id, 'cpu')
+    image_tensors = list(_images_to_tensors(images, exp))
+    batch_tensor = torch.stack(image_tensors)
+    with torch.no_grad():
+        output_tensor = model(batch_tensor)
+    outputs = postprocess(
+        output_tensor, 80, threshold, exp.nmsthre, class_agnostic=False
+    )
+    results: list[dict] = []
+    for image in images:
+        ratio = min(exp.test_size[0] / image.height, exp.test_size[1] / image.width)
+        if outputs[0] is None:
+            results.append({'bboxes': [], 'scores': [], 'labels': []})
+        else:
+            results.append({
+                'bboxes': [(output[:4] / ratio).tolist() for output in outputs[0]],
+                'scores': [output[4].item() * output[5].item() for output in outputs[0]],
+                'labels': [int(output[6]) for output in outputs[0]]
+            })
+    return results
+def _images_to_tensors(images: Iterable[PIL.Image.Image], exp: Exp) -> Iterator[torch.Tensor]:
+    for image in images:
+        image_transform, _ = _val_transform(np.array(image), None, exp.test_size)
+        yield torch.from_numpy(image_transform)
+def _lookup_model(model_id: str, device: str) -> (YOLOX, Exp):
+    key = (model_id, device)
+    if key in _model_cache:
+        return _model_cache[key]
+    weights_url = f'https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/{model_id}.pth'
+    weights_file = Path(f'{env.Env.get().tmp_dir}/{model_id}.pth')
+    if not weights_file.exists():
+        _logger.info(f'Downloading weights for YOLOX model {model_id}: from {weights_url} -> {weights_file}')
+        urlretrieve(weights_url, weights_file)
+    exp = get_exp(exp_name=model_id)
+    model = exp.get_model().to(device)
+    model.eval()
+    model.head.training = False
+    model.training = False
+    # Load in the weights from training
+    weights = torch.load(weights_file, map_location=torch.device(device))
+    model.load_state_dict(weights['model'])
+    _model_cache[key] = (model, exp)
+    return model, exp
+_model_cache = {}
+_val_transform = ValTransform(legacy=False)

pixeltable/func/aggregate_function.py CHANGED Viewed

@@ -3,13 +3,14 @@ from __future__ import annotations
 import abc
 import importlib
 import inspect
-from typing import Optional, Any, Type, List, Dict
+from typing import Optional, Any, Type, List, Dict, Callable
 import itertools
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
 from .function import Function
 from .signature import Signature, Parameter
+from .globals import validate_symbol_path
 class Aggregator(abc.ABC):
@@ -136,8 +137,7 @@ def uda(
         update_types: List[ts.ColumnType],
         init_types: Optional[List[ts.ColumnType]] = None,
         requires_order_by: bool = False, allows_std_agg: bool = True, allows_window: bool = False,
-        name: Optional[str] = None
-) -> Type[Aggregator]:
+) -> Callable:
     """Decorator for user-defined aggregate functions.
     The decorated class must inherit from Aggregator and implement the following methods:
@@ -155,14 +155,11 @@ def uda(
     - requires_order_by: if True, the first parameter to the function is the order-by expression
     - allows_std_agg: if True, the function can be used as a standard aggregate function w/o a window
     - allows_window: if True, the function can be used with a window
-    - name: name of the AggregateFunction instance; if None, the class name is used
     """
-    if name is not None and not name.isidentifier():
-        raise excs.Error(f'Invalid name: {name}')
     if init_types is None:
         init_types = []
-    def decorator(cls: Type[Aggregator]) -> Type[Aggregator]:
+    def decorator(cls: Type[Aggregator]) -> Type[Function]:
         # validate type parameters
         num_init_params = len(inspect.signature(cls.__init__).parameters) - 1
         if num_init_params > 0:
@@ -178,17 +175,20 @@ def uda(
         assert value_type is not None
         # the AggregateFunction instance resides in the same module as cls
-        module_path = cls.__module__
-        nonlocal name
-        name = name or cls.__name__
-        instance_path = f'{module_path}.{name}'
+        class_path = f'{cls.__module__}.{cls.__qualname__}'
+        # nonlocal name
+        # name = name or cls.__name__
+        # instance_path_elements = class_path.split('.')[:-1] + [name]
+        # instance_path = '.'.join(instance_path_elements)
         # create the corresponding AggregateFunction instance
         instance = AggregateFunction(
-            cls, instance_path, init_types, update_types, value_type, requires_order_by, allows_std_agg, allows_window)
-        module = importlib.import_module(module_path)
-        setattr(module, name, instance)
+            cls, class_path, init_types, update_types, value_type, requires_order_by, allows_std_agg, allows_window)
+        # do the path validation at the very end, in order to be able to write tests for the other failure cases
+        validate_symbol_path(class_path)
+        #module = importlib.import_module(cls.__module__)
+        #setattr(module, name, instance)
-        return cls
+        return instance
     return decorator

pixeltable/func/expr_template_function.py CHANGED Viewed

@@ -50,9 +50,17 @@ class ExprTemplateFunction(Function):
         bound_args.update(
             {param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args})
         result = self.expr.copy()
+        import pixeltable.exprs as exprs
         for param_name, arg in bound_args.items():
             param_expr = self.param_exprs_by_name[param_name]
-            result = result.substitute(param_expr, arg)
+            if not isinstance(arg, exprs.Expr):
+                # TODO: use the available param_expr.col_type
+                arg_expr = exprs.Expr.from_object(arg)
+                if arg_expr is None:
+                    raise excs.Error(f'{self.self_name}(): cannot convert argument {arg} to a Pixeltable expression')
+            else:
+                arg_expr = arg
+            result = result.substitute(param_expr, arg_expr)
         import pixeltable.exprs as exprs
         assert not result.contains(exprs.Variable)
         return result

pixeltable 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.4py3-none-any.whl → 0.2.5py3-none-any.whl