PyPI - pixeltable - Versions diffs - 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

pixeltable 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (63) hide show

pixeltable/catalog/column.py +26 -49
pixeltable/catalog/insertable_table.py +7 -4
pixeltable/catalog/table.py +163 -57
pixeltable/catalog/table_version.py +416 -140
pixeltable/catalog/table_version_path.py +2 -2
pixeltable/client.py +72 -6
pixeltable/dataframe.py +65 -21
pixeltable/env.py +52 -53
pixeltable/exec/cache_prefetch_node.py +1 -1
pixeltable/exec/in_memory_data_node.py +11 -7
pixeltable/exprs/comparison.py +3 -3
pixeltable/exprs/data_row.py +5 -1
pixeltable/exprs/literal.py +16 -4
pixeltable/exprs/row_builder.py +8 -40
pixeltable/ext/__init__.py +5 -0
pixeltable/ext/functions/yolox.py +92 -0
pixeltable/func/aggregate_function.py +15 -15
pixeltable/func/expr_template_function.py +9 -1
pixeltable/func/globals.py +24 -14
pixeltable/func/signature.py +18 -12
pixeltable/func/udf.py +7 -2
pixeltable/functions/__init__.py +9 -9
pixeltable/functions/eval.py +7 -8
pixeltable/functions/fireworks.py +10 -37
pixeltable/functions/huggingface.py +47 -19
pixeltable/functions/openai.py +192 -24
pixeltable/functions/together.py +104 -9
pixeltable/functions/util.py +11 -0
pixeltable/index/__init__.py +2 -0
pixeltable/index/base.py +49 -0
pixeltable/index/embedding_index.py +95 -0
pixeltable/metadata/schema.py +45 -22
pixeltable/plan.py +15 -34
pixeltable/store.py +38 -41
pixeltable/tests/conftest.py +8 -14
pixeltable/tests/ext/test_yolox.py +21 -0
pixeltable/tests/functions/test_fireworks.py +43 -0
pixeltable/tests/functions/test_functions.py +60 -0
pixeltable/tests/{test_functions.py → functions/test_huggingface.py} +7 -143
pixeltable/tests/functions/test_openai.py +162 -0
pixeltable/tests/functions/test_together.py +112 -0
pixeltable/tests/test_component_view.py +14 -5
pixeltable/tests/test_dataframe.py +23 -22
pixeltable/tests/test_exprs.py +99 -102
pixeltable/tests/test_function.py +51 -43
pixeltable/tests/test_index.py +138 -0
pixeltable/tests/test_migration.py +2 -1
pixeltable/tests/test_snapshot.py +24 -1
pixeltable/tests/test_table.py +205 -26
pixeltable/tests/test_types.py +30 -0
pixeltable/tests/test_video.py +16 -16
pixeltable/tests/test_view.py +5 -0
pixeltable/tests/utils.py +171 -14
pixeltable/tool/create_test_db_dump.py +16 -0
pixeltable/type_system.py +77 -128
pixeltable/utils/arrow.py +98 -0
pixeltable/utils/hf_datasets.py +157 -0
pixeltable/utils/parquet.py +68 -27
pixeltable/utils/pytorch.py +16 -97
{pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/METADATA +35 -28
{pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/RECORD +63 -50
{pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
{pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0

pixeltable/catalog/table_version_path.py CHANGED Viewed

@@ -101,8 +101,8 @@ class TableVersionPath:
         return DataFrame(self).__getitem__(index)
     def columns(self) -> List[Column]:
-        """Return all columns visible in this tbl version path, including columns from bases"""
-        result = self.tbl_version.cols.copy()
+        """Return all user columns visible in this tbl version path, including columns from bases"""
+        result = list(self.tbl_version.cols_by_name.values())
         if self.base is not None:
             base_cols = self.base.columns()
             # we only include base columns that don't conflict with one of our column names

pixeltable/client.py CHANGED Viewed

@@ -2,12 +2,11 @@ from typing import List, Optional, Dict, Type, Any, Union
 import pandas as pd
 import logging
 import dataclasses
-from uuid import UUID
-from collections import defaultdict
 import sqlalchemy as sql
 import sqlalchemy.orm as orm
+import pixeltable
 from pixeltable.metadata import schema
 from pixeltable.env import Env
 import pixeltable.func as func
@@ -16,6 +15,10 @@ from pixeltable import exceptions as excs
 from pixeltable.exprs import Predicate
 from pixeltable.iterators import ComponentIterator
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import datasets
 __all__ = [
     'Client',
 ]
@@ -129,10 +132,6 @@ class Client:
             Create a table with an int and a string column:
             >>> table = cl.create_table('my_table', schema={'col1': IntType(), 'col2': StringType()})
-            Create a table with a single indexed image column:
-            >>> table = cl.create_table('my_table', schema={'col1': {'type': ImageType(), 'indexed': True}})
         """
         path = catalog.Path(path_str)
         self.catalog.paths.check_is_valid(path, expected=None)
@@ -155,6 +154,73 @@ class Client:
         _logger.info(f'Created table `{path_str}`.')
         return tbl
+    def import_parquet(
+        self,
+        table_path: str,
+        *,
+        parquet_path: str,
+        schema_override: Optional[Dict[str, Any]] = None,
+        **kwargs,
+    ) -> catalog.InsertableTable:
+        """Create a new `InsertableTable` from a Parquet file or set of files. Requires pyarrow to be installed.
+        Args:
+            path_str: Path to the table within pixeltable.
+            parquet_path: Path to an individual Parquet file or directory of Parquet files.
+            schema_override: Optional dictionary mapping column names to column type to override the default
+                            schema inferred from the Parquet file. The column type should be a pixeltable ColumnType.
+                            For example, {'col_vid': VideoType()}, rather than {'col_vid': StringType()}.
+                            Any fields not provided explicitly will map to types with `pixeltable.utils.parquet.parquet_schema_to_pixeltable_schema`
+            kwargs: Additional arguments to pass to `Client.create_table`.
+        Returns:
+            The newly created table. The table will have loaded the data from the Parquet file(s).
+        """
+        from pixeltable.utils import parquet
+        return parquet.import_parquet(
+            self,
+            table_path=table_path,
+            parquet_path=parquet_path,
+            schema_override=schema_override,
+            **kwargs,
+        )
+    def import_huggingface_dataset(
+        self,
+        table_path: str,
+        dataset: Union['datasets.Dataset', 'datasets.DatasetDict'],
+        *,
+        column_name_for_split: Optional[str] = 'split',
+        schema_override: Optional[Dict[str, Any]] = None,
+        **kwargs
+    ) -> catalog.InsertableTable:
+        """Create a new `InsertableTable` from a Huggingface dataset, or dataset dict with multiple splits.
+            Requires datasets library to be installed.
+        Args:
+            path_str: Path to the table.
+            dataset: Huggingface datasts.Dataset or datasts.DatasetDict to insert into the table.
+            column_name_for_split: column name to use for split information. If None, no split information will be stored.
+            schema_override: Optional dictionary mapping column names to column type to override the corresponding defaults from
+            `pixeltable.utils.hf_datasets.huggingface_schema_to_pixeltable_schema`. The column type should be a pixeltable ColumnType.
+            For example, {'col_vid': VideoType()}, rather than {'col_vid': StringType()}.
+            kwargs: Additional arguments to pass to `create_table`.
+        Returns:
+            The newly created table. The table will have loaded the data from the dataset.
+        """
+        from pixeltable.utils import hf_datasets
+        return hf_datasets.import_huggingface_dataset(
+            self,
+            table_path,
+            dataset,
+            column_name_for_split=column_name_for_split,
+            schema_override=schema_override,
+            **kwargs,
+        )
     def create_view(
             self, path_str: str, base: catalog.Table, *, schema: Optional[Dict[str, Any]] = None,
             filter: Optional[Predicate] = None,

pixeltable/dataframe.py CHANGED Viewed

@@ -11,6 +11,8 @@ import traceback
 from pathlib import Path
 from typing import List, Optional, Any, Dict, Generator, Tuple, Set
+import PIL.Image
+import cv2
 import pandas as pd
 import pandas.io.formats.style
 import sqlalchemy as sql
@@ -31,15 +33,6 @@ __all__ = [
 _logger = logging.getLogger('pixeltable')
-def _format_img(img: object) -> str:
-    """
-    Create <img> tag for Image object.
-    """
-    assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
-    with io.BytesIO() as buffer:
-        img.save(buffer, 'jpeg')
-        img_base64 = base64.b64encode(buffer.getvalue()).decode()
-        return f'<div style="width:200px;"><img src="data:image/jpeg;base64,{img_base64}" width="200" /></div>'
 def _create_source_tag(file_path: str) -> str:
     abs_path = Path(file_path)
@@ -50,21 +43,17 @@ def _create_source_tag(file_path: str) -> str:
     mime_attr = f'type="{mime}"' if mime is not None else ''
     return f'<source src="{src_url}" {mime_attr} />'
-def _format_video(file_path: str) -> str:
-    return f'<video controls>{_create_source_tag(file_path)}</video>'
-def _format_audio(file_path: str) -> str:
-    return f'<audio controls>{_create_source_tag(file_path)}</audio>'
 class DataFrameResultSet:
     def __init__(self, rows: List[List[Any]], col_names: List[str], col_types: List[ColumnType]):
         self._rows = rows
         self._col_names = col_names
         self._col_types = col_types
         self._formatters = {
-            ts.ImageType: _format_img,
-            ts.VideoType: _format_video,
-            ts.AudioType: _format_audio,
+            ts.ImageType: self._format_img,
+            ts.VideoType: self._format_video,
+            ts.AudioType: self._format_audio,
         }
     def __len__(self) -> int:
@@ -85,9 +74,7 @@ class DataFrameResultSet:
             for col_name, col_type in zip(self._col_names, self._col_types)
             if col_type.__class__ in self._formatters
         }
-        # TODO: why does mypy complain about formatters having an incorrect type?
-        return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)  # type: ignore[arg-type]
+        return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)
     def __str__(self) -> str:
         return self.to_pandas().to_string()
@@ -102,6 +89,64 @@ class DataFrameResultSet:
     def _row_to_dict(self, row_idx: int) -> Dict[str, Any]:
         return {self._col_names[i]: self._rows[row_idx][i] for i in range(len(self._col_names))}
+    # Formatters
+    def _format_img(self, img: Image.Image) -> str:
+        """
+        Create <img> tag for Image object.
+        """
+        assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
+        # Try to make it look decent in a variety of display scenarios
+        if len(self._rows) > 1:
+            width = 240  # Multiple rows: display small images
+        elif len(self._col_names) > 1:
+            width = 480  # Multiple columns: display medium images
+        else:
+            width = 640  # A single image: larger display
+        with io.BytesIO() as buffer:
+            img.save(buffer, 'jpeg')
+            img_base64 = base64.b64encode(buffer.getvalue()).decode()
+            return f'''
+            <div style="width:{width}px;">
+                <img src="data:image/jpeg;base64,{img_base64}" width="{width}" />
+            </div>
+            '''
+    def _format_video(self, file_path: str) -> str:
+        thumb_tag = ""
+        # Attempt to extract the first frame of the video to use as a thumbnail,
+        # so that the notebook can be exported as HTML and viewed in contexts where
+        # the video itself is not accessible.
+        # TODO(aaron-siegel): If the video is backed by a concrete external URL,
+        # should we link to that instead?
+        video_reader = cv2.VideoCapture(str(file_path))
+        if video_reader.isOpened():
+            status, img_array = video_reader.read()
+            if status:
+                img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
+                thumb = PIL.Image.fromarray(img_array)
+                with io.BytesIO() as buffer:
+                    thumb.save(buffer, 'jpeg')
+                    thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
+                    thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
+            video_reader.release()
+        if len(self._rows) > 1:
+            width = 320
+        elif len(self._col_names) > 1:
+            width = 480
+        else:
+            width = 800
+        return f'''
+        <div style="width:{width}px;">
+            <video controls width="{width}" {thumb_tag}>
+                {_create_source_tag(file_path)}
+            </video>
+        </div>
+        '''
+    def _format_audio(self, file_path: str) -> str:
+        return f'<audio controls>{_create_source_tag(file_path)}</audio>'
     def __getitem__(self, index: Any) -> Any:
         if isinstance(index, str):
             if index not in self._col_names:
@@ -173,7 +218,6 @@ class AnalysisInfo:
             self.filter.release()
 class DataFrame:
     def __init__(
             self, tbl: catalog.TableVersionPath,

pixeltable/env.py CHANGED Viewed

@@ -1,33 +1,29 @@
 from __future__ import annotations
 import datetime
-import os
-from typing import Optional, Dict, Any, List
-from pathlib import Path
-import sqlalchemy as sql
-import uuid
+import glob
+import http.server
 import importlib
 import importlib.util
-import http.server
+import logging
+import os
 import socketserver
+import sys
 import threading
-import typing
 import uuid
+import warnings
 from pathlib import Path
-from typing import Optional, Dict, Any, List
+from typing import Callable, Optional, Dict, Any, List
+import pgserver
+import sqlalchemy as sql
 import yaml
 from sqlalchemy_utils.functions import database_exists, create_database, drop_database
-import pgserver
-import logging
-import sys
-import glob
+from tqdm import TqdmWarning
-from pixeltable import metadata
 import pixeltable.exceptions as excs
+from pixeltable import metadata
-if typing.TYPE_CHECKING:
-    import openai
 class Env:
     """
@@ -59,12 +55,12 @@ class Env:
         # package name -> version; version == []: package is installed, but we haven't determined the version yet
         self._installed_packages: Dict[str, Optional[List[int]]] = {}
         self._nos_client: Optional[Any] = None
-        self._openai_client: Optional['openai.OpenAI'] = None
-        self._has_together_client: bool = False
         self._spacy_nlp: Optional[Any] = None  # spacy.Language
         self._httpd: Optional[socketserver.TCPServer] = None
         self._http_address: Optional[str] = None
+        self._registered_clients: dict[str, Any] = {}
         # logging-related state
         self._logger = logging.getLogger('pixeltable')
         self._logger.setLevel(logging.DEBUG)  # allow everything to pass, we filter in _log_filter()
@@ -193,11 +189,21 @@ class Env:
         fh = logging.FileHandler(self._log_dir / self._logfilename, mode='w')
         fh.setFormatter(logging.Formatter(self._log_fmt_str))
         self._logger.addHandler(fh)
+        # configure sqlalchemy logging
         sql_logger = logging.getLogger('sqlalchemy.engine')
         sql_logger.setLevel(logging.INFO)
         sql_logger.addHandler(fh)
         sql_logger.propagate = False
+        # configure pyav logging
+        av_logfilename = self._logfilename.replace('.log', '_av.log')
+        av_fh = logging.FileHandler(self._log_dir / av_logfilename, mode='w')
+        av_fh.setFormatter(logging.Formatter(self._log_fmt_str))
+        av_logger = logging.getLogger('libav')
+        av_logger.addHandler(av_fh)
+        av_logger.propagate = False
         # empty tmp dir
         for path in glob.glob(f'{self._tmp_dir}/*'):
             os.remove(path)
@@ -234,6 +240,9 @@ class Env:
         self._set_up_runtime()
         self.log_to_stdout(False)
+        # Disable spurious warnings
+        warnings.simplefilter("ignore", category=TqdmWarning)
     def upgrade_metadata(self) -> None:
         metadata.upgrade_md(self._sa_engine)
@@ -256,31 +265,32 @@ class Env:
         from pixeltable.functions.util import create_nos_modules
         _ = create_nos_modules()
-    def _create_openai_client(self) -> None:
-        if not self.is_installed_package('openai'):
-            raise excs.Error('OpenAI client not initialized (cannot find package `openai`: `pip install openai`?)')
-        import openai
-        if 'openai' in self._config and 'api_key' in self._config['openai']:
-            api_key = self._config['openai']['api_key']
-        else:
-            api_key = os.environ.get('OPENAI_API_KEY')
-        if api_key is None or api_key == '':
-            raise excs.Error('OpenAI client not initialized (no API key configured).')
-        self._openai_client = openai.OpenAI(api_key=api_key)
-        self._logger.info('Initialized OpenAI client.')
+    def get_client(self, name: str, init: Callable, environ: Optional[str] = None) -> Any:
+        """
+        Gets the client with the specified name, using `init` to construct one if necessary.
-    def _create_together_client(self) -> None:
-        if 'together' in self._config and 'api_key' in self._config['together']:
-            api_key = self._config['together']['api_key']
+        - name: The name of the client
+        - init: A `Callable` with signature `fn(api_key: str) -> Any` that constructs a client object
+        - environ: The name of the environment variable to use for the API key, if no API key is found in config
+            (defaults to f'{name.upper()}_API_KEY')
+        """
+        if name in self._registered_clients:
+            return self._registered_clients[name]
+        if environ is None:
+            environ = f'{name.upper()}_API_KEY'
+        if name in self._config and 'api_key' in self._config[name]:
+            api_key = self._config[name]['api_key']
         else:
-            api_key = os.environ.get('TOGETHER_API_KEY')
+            api_key = os.environ.get(environ)
         if api_key is None or api_key == '':
-            self._logger.info('Together client not initialized (no API key configured).')
-            return
-        import together
-        self._logger.info('Initializing Together client.')
-        together.api_key = api_key
-        self._has_together_client = True
+            raise excs.Error(f'`{name}` client not initialized (no API key configured).')
+        client = init(api_key)
+        self._registered_clients[name] = client
+        self._logger.info(f'Initialized `{name}` client.')
+        return client
     def _start_web_server(self) -> None:
         """
@@ -319,10 +329,12 @@ class Env:
             else:
                 self._installed_packages[package] = None
+        check('datasets')
         check('torch')
         check('torchvision')
         check('transformers')
         check('sentence_transformers')
+        check('yolox')
         check('boto3')
         check('pyarrow')
         check('spacy')  # TODO: deal with en-core-web-sm
@@ -332,8 +344,6 @@ class Env:
         check('tiktoken')
         check('openai')
         check('together')
-        if self.is_installed_package('together'):
-            self._create_together_client()
         check('fireworks')
         check('nos')
         if self.is_installed_package('nos'):
@@ -399,17 +409,6 @@ class Env:
     def nos_client(self) -> Any:
         return self._nos_client
-    @property
-    def openai_client(self) -> 'openai.OpenAI':
-        if self._openai_client is None:
-            self._create_openai_client()
-        assert self._openai_client is not None
-        return self._openai_client
-    @property
-    def has_together_client(self) -> bool:
-        return self._has_together_client
     @property
     def spacy_nlp(self) -> Any:
         assert self._spacy_nlp is not None

pixeltable/exec/cache_prefetch_node.py CHANGED Viewed

@@ -89,7 +89,7 @@ class CachePrefetchNode(ExecNode):
         # preserve the file extension, if there is one
         extension = ''
         if parsed.path != '':
-            p = Path(urllib.parse.unquote(parsed.path))
+            p = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed.path)))
             extension = p.suffix
         tmp_path = env.Env.get().create_tmp_path(extension=extension)
         try:

pixeltable/exec/in_memory_data_node.py CHANGED Viewed

@@ -29,18 +29,21 @@ class InMemoryDataNode(ExecNode):
     def _open(self) -> None:
         """Create row batch and populate with self.input_rows"""
-        column_info = {info.col.name: info for info in self.row_builder.output_slot_idxs()}
+        column_info = {info.col.id: info for info in self.row_builder.output_slot_idxs()}
+        # exclude system columns
+        user_column_info = {info.col.name: info for _, info in column_info.items() if info.col.name is not None}
         # stored columns that are not computed
-        inserted_column_names = set([
-            info.col.name for info in self.row_builder.output_slot_idxs()
+        inserted_col_ids = set([
+            info.col.id for info in self.row_builder.output_slot_idxs()
             if info.col.is_stored and not info.col.is_computed
         ])
         self.output_rows = DataRowBatch(self.tbl, self.row_builder, len(self.input_rows))
         for row_idx, input_row in enumerate(self.input_rows):
             # populate the output row with the values provided in the input row
+            input_col_ids: List[int] = []
             for col_name, val in input_row.items():
-                col_info = column_info.get(col_name)
+                col_info = user_column_info.get(col_name)
                 assert col_info is not None
                 if col_info.col.col_type.is_image_type() and isinstance(val, bytes):
@@ -49,11 +52,12 @@ class InMemoryDataNode(ExecNode):
                     open(path, 'wb').write(val)
                     val = path
                 self.output_rows[row_idx][col_info.slot_idx] = val
+                input_col_ids.append(col_info.col.id)
             # set the remaining stored non-computed columns to null
-            null_col_names = inserted_column_names - set(input_row.keys())
-            for col_name in null_col_names:
-                col_info = column_info.get(col_name)
+            null_col_ids = inserted_col_ids - set(input_col_ids)
+            for col_id in null_col_ids:
+                col_info = column_info.get(col_id)
                 assert col_info is not None
                 self.output_rows[row_idx][col_info.slot_idx] = None

pixeltable/exprs/comparison.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from __future__ import annotations
 from typing import Optional, List, Any, Dict, Tuple
 import sqlalchemy as sql
-from .globals import ComparisonOperator
+from .data_row import DataRow
 from .expr import Expr
+from .globals import ComparisonOperator
 from .predicate import Predicate
-from .data_row import DataRow
 from .row_builder import RowBuilder
-import pixeltable.catalog as catalog
 class Comparison(Predicate):

pixeltable/exprs/data_row.py CHANGED Viewed

@@ -5,6 +5,8 @@ import urllib.parse
 import urllib.request
 from typing import Optional, List, Any, Tuple
+import sqlalchemy as sql
+import pgvector.sqlalchemy
 import PIL
 import numpy as np
@@ -110,7 +112,7 @@ class DataRow:
         return self.vals[index]
-    def get_stored_val(self, index: object) -> Any:
+    def get_stored_val(self, index: object, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
         """Return the value that gets stored in the db"""
         assert self.excs[index] is None
         if not self.has_val[index]:
@@ -125,6 +127,8 @@ class DataRow:
         if self.vals[index] is not None and index in self.array_slot_idxs:
             assert isinstance(self.vals[index], np.ndarray)
             np_array = self.vals[index]
+            if sa_col_type is not None and isinstance(sa_col_type, pgvector.sqlalchemy.Vector):
+                return np_array
             buffer = io.BytesIO()
             np.save(buffer, np_array)
             return buffer.getvalue()

pixeltable/exprs/literal.py CHANGED Viewed

@@ -1,13 +1,16 @@
 from __future__ import annotations
+import datetime
 from typing import Optional, List, Any, Dict, Tuple
 import sqlalchemy as sql
-from .expr import Expr
+import pixeltable.exceptions as excs
+import pixeltable.type_system as ts
 from .data_row import DataRow
+from .expr import Expr
 from .row_builder import RowBuilder
-import pixeltable.catalog as catalog
-import pixeltable.type_system as ts
 class Literal(Expr):
     def __init__(self, val: Any, col_type: Optional[ts.ColumnType] = None):
@@ -46,9 +49,18 @@ class Literal(Expr):
         data_row[self.slot_idx] = self.val
     def _as_dict(self) -> Dict:
-        return {'val': self.val, **super()._as_dict()}
+        # For some types, we need to explictly record their type, because JSON does not know
+        # how to interpret them unambiguously
+        if self.col_type.is_timestamp_type():
+            return {'val': self.val.isoformat(), 'val_t': self.col_type._type.name, **super()._as_dict()}
+        else:
+            return {'val': self.val, **super()._as_dict()}
     @classmethod
     def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
         assert 'val' in d
+        if 'val_t' in d:
+            val_t = d['val_t']
+            assert val_t == ts.ColumnType.Type.TIMESTAMP.name
+            return cls(datetime.datetime.fromisoformat(d['val']))
         return cls(d['val'])

pixeltable/exprs/row_builder.py CHANGED Viewed

@@ -54,14 +54,12 @@ class RowBuilder:
         target_exprs: List[Expr]  # exprs corresponding to target_slot_idxs
     def __init__(
-            self, output_exprs: List[Expr], columns: List[catalog.Column],
-            indices: List[Tuple[catalog.Column, func.Function]], input_exprs: List[Expr]
+            self, output_exprs: List[Expr], columns: List[catalog.Column], input_exprs: List[Expr]
     ):
         """
         Args:
             output_exprs: list of Exprs to be evaluated
             columns: list of columns to be materialized
-            indices: list of embeddings to be materialized (Tuple[indexed column, embedding function])
         """
         self.unique_exprs = ExprSet()  # dependencies precede their dependents
         self.next_slot_idx = 0
@@ -73,7 +71,6 @@ class RowBuilder:
         # output exprs: all exprs the caller wants to materialize
         # - explicitly requested output_exprs
         # - values for computed columns
-        # - embedding values for indices
         resolve_cols = set(columns)
         self.output_exprs = [
             self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
@@ -97,21 +94,6 @@ class RowBuilder:
                 ref = self._record_unique_expr(ref, recursive=False)
                 self.add_table_column(col, ref.slot_idx)
-        # record indices; indexed by slot_idx
-        self.index_columns: List[catalog.Column] = []
-        for col, embedding_fn in indices:
-            # we assume that the parameter of the embedding function is a ref to an image column
-            assert col.col_type.is_image_type()
-            # construct expr to compute embedding; explicitly resize images to the required size
-            target_img_type = next(iter(embedding_fn.signature.parameters.values())).col_type
-            expr = embedding_fn(ColumnRef(col).resize(target_img_type.size))
-            expr = self._record_unique_expr(expr, recursive=True)
-            self.output_exprs.append(expr)
-            if len(self.index_columns) <= expr.slot_idx:
-                # pad to slot_idx
-                self.index_columns.extend([None] * (expr.slot_idx - len(self.index_columns) + 1))
-            self.index_columns[expr.slot_idx] = col
         # default eval ctx: all output exprs
         self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
@@ -170,13 +152,6 @@ class RowBuilder:
         """Return ColumnSlotIdx for output columns"""
         return self.table_columns
-    def index_slot_idxs(self) -> List[ColumnSlotIdx]:
-        """Return ColumnSlotIdx for index columns"""
-        return [
-            ColumnSlotIdx(self.output_columns[i], i) for i in range(len(self.index_columns))
-            if self.output_columns[i] is not None
-        ]
     @property
     def num_materialized(self) -> int:
         return self.next_slot_idx
@@ -334,22 +309,15 @@ class RowBuilder:
                 exc = data_row.get_exc(slot_idx)
                 num_excs += 1
                 exc_col_ids.add(col.id)
-                table_row[col.storage_name()] = None
-                table_row[col.errortype_storage_name()] = type(exc).__name__
-                table_row[col.errormsg_storage_name()] = str(exc)
+                table_row[col.store_name()] = None
+                table_row[col.errortype_store_name()] = type(exc).__name__
+                table_row[col.errormsg_store_name()] = str(exc)
             else:
-                val = data_row.get_stored_val(slot_idx)
-                table_row[col.storage_name()] = val
+                val = data_row.get_stored_val(slot_idx, col.sa_col.type)
+                table_row[col.store_name()] = val
                 # we unfortunately need to set these, even if there are no errors
-                table_row[col.errortype_storage_name()] = None
-                table_row[col.errormsg_storage_name()] = None
-        for slot_idx, col in enumerate(self.index_columns):
-            if col is None:
-                continue
-            # don't use get_stored_val() here, we need to pass in the ndarray
-            val = data_row[slot_idx]
-            table_row[col.index_storage_name()] = val
+                table_row[col.errortype_store_name()] = None
+                table_row[col.errormsg_store_name()] = None
         return table_row, num_excs

pixeltable/ext/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""
+Extended integrations for Pixeltable. This package contains experimental or demonstration features that
+are not intended for production use. Long-term support cannot be guaranteed, usually because the features
+have dependencies whose future support is unclear.
+"""

pixeltable 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl