PyPI - pixeltable - Versions diffs - 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl - Mend

pixeltable 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (39) hide show

pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +106 -71
pixeltable/catalog/path.py +59 -20
pixeltable/catalog/schema_object.py +1 -0
pixeltable/catalog/table.py +6 -0
pixeltable/catalog/table_version.py +2 -1
pixeltable/catalog/view.py +21 -10
pixeltable/config.py +12 -4
pixeltable/dataframe.py +57 -1
pixeltable/env.py +25 -13
pixeltable/exec/aggregation_node.py +1 -1
pixeltable/exec/cache_prefetch_node.py +2 -6
pixeltable/exec/component_iteration_node.py +4 -3
pixeltable/exec/data_row_batch.py +10 -53
pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
pixeltable/exec/in_memory_data_node.py +13 -11
pixeltable/exec/sql_node.py +6 -7
pixeltable/exprs/data_row.py +13 -13
pixeltable/exprs/row_builder.py +16 -4
pixeltable/exprs/string_op.py +1 -1
pixeltable/func/expr_template_function.py +1 -4
pixeltable/functions/date.py +1 -1
pixeltable/functions/math.py +1 -1
pixeltable/functions/openai.py +8 -4
pixeltable/functions/timestamp.py +6 -6
pixeltable/globals.py +14 -10
pixeltable/metadata/schema.py +1 -1
pixeltable/plan.py +5 -14
pixeltable/share/packager.py +13 -13
pixeltable/store.py +9 -6
pixeltable/type_system.py +2 -1
pixeltable/utils/filecache.py +1 -1
pixeltable/utils/http_server.py +2 -3
pixeltable/utils/media_store.py +84 -39
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/METADATA +1 -1
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/RECORD +39 -39
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/LICENSE +0 -0
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/WHEEL +0 -0
{pixeltable-0.4.4.dist-info → pixeltable-0.4.5.dist-info}/entry_points.txt +0 -0

pixeltable/dataframe.py CHANGED Viewed

@@ -8,9 +8,22 @@ import json
 import logging
 import traceback
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, AsyncIterator, Callable, Hashable, Iterator, NoReturn, Optional, Sequence, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncIterator,
+    Callable,
+    Hashable,
+    Iterator,
+    NoReturn,
+    Optional,
+    Sequence,
+    TypeVar,
+    Union,
+)
 import pandas as pd
+import pydantic
 import sqlalchemy as sql
 from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_system as ts
@@ -32,6 +45,11 @@ _logger = logging.getLogger('pixeltable')
 class DataFrameResultSet:
+    _rows: list[list[Any]]
+    _col_names: list[str]
+    __schema: dict[str, ColumnType]
+    __formatter: Formatter
     def __init__(self, rows: list[list[Any]], schema: dict[str, ColumnType]):
         self._rows = rows
         self._col_names = list(schema.keys())
@@ -66,6 +84,44 @@ class DataFrameResultSet:
     def to_pandas(self) -> pd.DataFrame:
         return pd.DataFrame.from_records(self._rows, columns=self._col_names)
+    BaseModelT = TypeVar('BaseModelT', bound=pydantic.BaseModel)
+    def to_pydantic(self, model: type[BaseModelT]) -> Iterator[BaseModelT]:
+        """
+        Convert the DataFrameResultSet to a list of Pydantic model instances.
+        Args:
+            model: A Pydantic model class.
+        Returns:
+            An iterator over Pydantic model instances, one for each row in the result set.
+        Raises:
+            Error: If the row data doesn't match the model schema.
+        """
+        model_fields = model.model_fields
+        model_config = getattr(model, 'model_config', {})
+        forbid_extra_fields = model_config.get('extra') == 'forbid'
+        # schema validation
+        required_fields = {name for name, field in model_fields.items() if field.is_required()}
+        col_names = set(self._col_names)
+        missing_fields = required_fields - col_names
+        if len(missing_fields) > 0:
+            raise excs.Error(
+                f'Required model fields {missing_fields} are missing from result set columns {self._col_names}'
+            )
+        if forbid_extra_fields:
+            extra_fields = col_names - set(model_fields.keys())
+            if len(extra_fields) > 0:
+                raise excs.Error(f"Extra fields {extra_fields} are not allowed in model with extra='forbid'")
+        for row in self:
+            try:
+                yield model(**row)
+            except pydantic.ValidationError as e:
+                raise excs.Error(str(e)) from e
     def _row_to_dict(self, row_idx: int) -> dict[str, Any]:
         return {self._col_names[i]: self._rows[row_idx][i] for i in range(len(self._col_names))}

pixeltable/env.py CHANGED Viewed

@@ -13,6 +13,8 @@ import platform
 import shutil
 import sys
 import threading
+import types
+import typing
 import uuid
 import warnings
 from abc import abstractmethod
@@ -604,16 +606,26 @@ class Env:
         # Construct a client, retrieving each parameter from config.
-        init_kwargs: dict[str, str] = {}
-        for param in cl.param_names:
-            arg = Config.get().get_string_value(param, section=name)
-            if arg is not None and len(arg) > 0:
-                init_kwargs[param] = arg
-            else:
+        init_kwargs: dict[str, Any] = {}
+        for param in cl.params.values():
+            # Determine the type of the parameter for proper config parsing.
+            t = param.annotation
+            # Deference Optional[T]
+            if typing.get_origin(t) in (typing.Union, types.UnionType):
+                args = typing.get_args(t)
+                if args[0] is type(None):
+                    t = args[1]
+                elif args[1] is type(None):
+                    t = args[0]
+            assert isinstance(t, type), t
+            arg: Any = Config.get().get_value(param.name, t, section=name)
+            if arg is not None:
+                init_kwargs[param.name] = arg
+            elif param.default is inspect.Parameter.empty:
                 raise excs.Error(
-                    f'`{name}` client not initialized: parameter `{param}` is not configured.\n'
-                    f'To fix this, specify the `{name.upper()}_{param.upper()}` environment variable, '
-                    f'or put `{param.lower()}` in the `{name.lower()}` section of $PIXELTABLE_HOME/config.toml.'
+                    f'`{name}` client not initialized: parameter `{param.name}` is not configured.\n'
+                    f'To fix this, specify the `{name.upper()}_{param.name.upper()}` environment variable, '
+                    f'or put `{param.name.lower()}` in the `{name.lower()}` section of $PIXELTABLE_HOME/config.toml.'
                 )
         cl.client_obj = cl.init_fn(**init_kwargs)
@@ -624,7 +636,7 @@ class Env:
         """
         The http server root is the file system root.
         eg: /home/media/foo.mp4 is located at http://127.0.0.1:{port}/home/media/foo.mp4
-        in windows, the server will translate paths like http://127.0.0.1:{port}/c:/media/foo.mp4
+        On Windows, the server will translate paths like http://127.0.0.1:{port}/c:/media/foo.mp4
         This arrangement enables serving media hosted within _home,
         as well as external media inserted into pixeltable or produced by pixeltable.
         The port is chosen dynamically to prevent conflicts.
@@ -832,8 +844,8 @@ def register_client(name: str) -> Callable:
     def decorator(fn: Callable) -> None:
         sig = inspect.signature(fn)
-        param_names = list(sig.parameters.keys())
-        _registered_clients[name] = ApiClient(init_fn=fn, param_names=param_names)
+        params = dict(sig.parameters)
+        _registered_clients[name] = ApiClient(init_fn=fn, params=params)
     return decorator
@@ -844,7 +856,7 @@ _registered_clients: dict[str, ApiClient] = {}
 @dataclass
 class ApiClient:
     init_fn: Callable
-    param_names: list[str]
+    params: dict[str, inspect.Parameter]
     client_obj: Optional[Any] = None

pixeltable/exec/aggregation_node.py CHANGED Viewed

@@ -45,7 +45,7 @@ class AggregationNode(ExecNode):
         # we need to make sure to refer to the same exprs that RowBuilder.eval() will use
         self.agg_fn_calls = [cast(exprs.FunctionCall, e) for e in self.agg_fn_eval_ctx.target_exprs]
         # create output_batch here, rather than in __iter__(), so we don't need to remember tbl and row_builder
-        self.output_batch = DataRowBatch(tbl, row_builder, 0)
+        self.output_batch = DataRowBatch(row_builder)
         self.limit = None
     def set_limit(self, limit: int) -> None:

pixeltable/exec/cache_prefetch_node.py CHANGED Viewed

@@ -12,7 +12,7 @@ from pathlib import Path
 from typing import Any, AsyncIterator, Iterator, Optional
 from uuid import UUID
-from pixeltable import catalog, env, exceptions as excs, exprs
+from pixeltable import env, exceptions as excs, exprs
 from pixeltable.utils.filecache import FileCache
 from .data_row_batch import DataRowBatch
@@ -37,7 +37,6 @@ class CachePrefetchNode(ExecNode):
     boto_client_lock: threading.Lock
     # execution state
-    batch_tbl_version: Optional[catalog.TableVersionHandle]  # needed to construct output batches
     num_returned_rows: int
     # ready_rows: rows that are ready to be returned, ordered by row idx;
@@ -68,7 +67,6 @@ class CachePrefetchNode(ExecNode):
         self.boto_client = None
         self.boto_client_lock = threading.Lock()
-        self.batch_tbl_version = None
         self.num_returned_rows = 0
         self.ready_rows = deque()
         self.in_flight_rows = {}
@@ -95,7 +93,7 @@ class CachePrefetchNode(ExecNode):
                 if len(self.ready_rows) > 0:
                     # create DataRowBatch from the first BATCH_SIZE ready rows
-                    batch = DataRowBatch(self.batch_tbl_version, self.row_builder)
+                    batch = DataRowBatch(self.row_builder)
                     rows = [self.ready_rows.popleft() for _ in range(min(self.BATCH_SIZE, len(self.ready_rows)))]
                     for row in rows:
                         assert row is not None
@@ -173,8 +171,6 @@ class CachePrefetchNode(ExecNode):
         if input_batch is None:
             self.input_finished = True
             return
-        if self.batch_tbl_version is None:
-            self.batch_tbl_version = input_batch.tbl
         file_cache = FileCache.get()

pixeltable/exec/component_iteration_node.py CHANGED Viewed

@@ -40,7 +40,7 @@ class ComponentIterationNode(ExecNode):
         }
     async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
-        output_batch = DataRowBatch(self.view, self.row_builder)
+        output_batch = DataRowBatch(self.row_builder)
         async for input_batch in self.input:
             for input_row in input_batch:
                 self.row_builder.eval(input_row, self.iterator_args_ctx)
@@ -52,13 +52,14 @@ class ComponentIterationNode(ExecNode):
                 if self.__non_nullable_args_specified(iterator_args):
                     iterator = self.view.get().iterator_cls(**iterator_args)
                     for pos, component_dict in enumerate(iterator):
-                        output_row = output_batch.add_row()
+                        output_row = self.row_builder.make_row()
                         input_row.copy(output_row)
                         # we're expanding the input and need to add the iterator position to the pk
                         self.__populate_output_row(output_row, pos, component_dict)
+                        output_batch.add_row(output_row)
                         if len(output_batch) == self.__OUTPUT_BATCH_SIZE:
                             yield output_batch
-                            output_batch = DataRowBatch(self.view, self.row_builder)
+                            output_batch = DataRowBatch(self.row_builder)
         if len(output_batch) > 0:
             yield output_batch

pixeltable/exec/data_row_batch.py CHANGED Viewed

@@ -3,8 +3,7 @@ from __future__ import annotations
 import logging
 from typing import Iterator, Optional
-from pixeltable import catalog, exprs
-from pixeltable.utils.media_store import MediaStore
+from pixeltable import exprs
 _logger = logging.getLogger('pixeltable')
@@ -15,51 +14,19 @@ class DataRowBatch:
     Contains the metadata needed to initialize DataRows.
     """
-    tbl: Optional[catalog.TableVersionHandle]
     row_builder: exprs.RowBuilder
-    img_slot_idxs: list[int]
-    media_slot_idxs: list[int]  # non-image media slots
-    array_slot_idxs: list[int]
     rows: list[exprs.DataRow]
-    def __init__(
-        self,
-        tbl: Optional[catalog.TableVersionHandle],
-        row_builder: exprs.RowBuilder,
-        num_rows: Optional[int] = None,
-        rows: Optional[list[exprs.DataRow]] = None,
-    ):
+    def __init__(self, row_builder: exprs.RowBuilder, rows: Optional[list[exprs.DataRow]] = None):
         """
         Requires either num_rows or rows to be specified, but not both.
         """
-        assert num_rows is None or rows is None
-        self.tbl = tbl
         self.row_builder = row_builder
-        self.img_slot_idxs = [e.slot_idx for e in row_builder.unique_exprs if e.col_type.is_image_type()]
-        # non-image media slots
-        self.media_slot_idxs = [
-            e.slot_idx
-            for e in row_builder.unique_exprs
-            if e.col_type.is_media_type() and not e.col_type.is_image_type()
-        ]
-        self.array_slot_idxs = [e.slot_idx for e in row_builder.unique_exprs if e.col_type.is_array_type()]
-        if rows is not None:
-            self.rows = rows
-        else:
-            if num_rows is None:
-                num_rows = 0
-            self.rows = [
-                exprs.DataRow(
-                    row_builder.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs
-                )
-                for _ in range(num_rows)
-            ]
+        self.rows = [] if rows is None else rows
-    def add_row(self, row: Optional[exprs.DataRow] = None) -> exprs.DataRow:
+    def add_row(self, row: Optional[exprs.DataRow]) -> exprs.DataRow:
         if row is None:
-            row = exprs.DataRow(
-                self.row_builder.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs
-            )
+            row = self.row_builder.make_row()
         self.rows.append(row)
         return row
@@ -73,28 +40,18 @@ class DataRowBatch:
         return self.rows[index]
     def flush_imgs(
-        self,
-        idx_range: Optional[slice] = None,
-        stored_img_info: Optional[list[exprs.ColumnSlotIdx]] = None,
-        flushed_slot_idxs: Optional[list[int]] = None,
+        self, idx_range: Optional[slice], stored_img_info: list[exprs.ColumnSlotIdx], flushed_img_slots: list[int]
     ) -> None:
         """Flushes images in the given range of rows."""
-        assert self.tbl is not None
-        if stored_img_info is None:
-            stored_img_info = []
-        if flushed_slot_idxs is None:
-            flushed_slot_idxs = []
-        if len(stored_img_info) == 0 and len(flushed_slot_idxs) == 0:
+        if len(stored_img_info) == 0 and len(flushed_img_slots) == 0:
             return
         if idx_range is None:
             idx_range = slice(0, len(self.rows))
         for row in self.rows[idx_range]:
             for info in stored_img_info:
-                col = info.col
-                assert col.tbl.id == self.tbl.id
-                filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
-                row.flush_img(info.slot_idx, filepath)
-            for slot_idx in flushed_slot_idxs:
+                row.flush_img(info.slot_idx, info.col)
+            for slot_idx in flushed_img_slots:
                 row.flush_img(slot_idx)
     def __iter__(self) -> Iterator[exprs.DataRow]:

pixeltable/exec/expr_eval/expr_eval_node.py CHANGED Viewed

@@ -240,7 +240,7 @@ class ExprEvalNode(ExecNode):
                         # make sure we top up our in-flight rows before yielding
                         self._dispatch_input_rows()
                         self._log_state(f'yielding {len(batch_rows)} rows')
-                        yield DataRowBatch(tbl=None, row_builder=self.row_builder, rows=batch_rows)
+                        yield DataRowBatch(row_builder=self.row_builder, rows=batch_rows)
                         # at this point, we may have more completed rows
                 assert self.completed_rows.empty()  # all completed rows should be sitting in output_buffer
@@ -254,7 +254,7 @@ class ExprEvalNode(ExecNode):
                         batch_rows = self.output_buffer.get_rows(self.output_buffer.num_ready)
                         self.num_output_rows += len(batch_rows)
                         self._log_state(f'yielding {len(batch_rows)} rows')
-                        yield DataRowBatch(tbl=None, row_builder=self.row_builder, rows=batch_rows)
+                        yield DataRowBatch(row_builder=self.row_builder, rows=batch_rows)
                     assert self.output_buffer.num_rows == 0
                     return

pixeltable/exec/in_memory_data_node.py CHANGED Viewed

@@ -23,7 +23,7 @@ class InMemoryDataNode(ExecNode):
     input_rows: list[dict[str, Any]]
     start_row_id: int
-    output_rows: Optional[DataRowBatch]
+    output_batch: Optional[DataRowBatch]
     # output_exprs is declared in the superclass, but we redeclare it here with a more specific type
     output_exprs: list[exprs.ColumnRef]
@@ -42,7 +42,7 @@ class InMemoryDataNode(ExecNode):
         self.tbl = tbl
         self.input_rows = rows
         self.start_row_id = start_row_id
-        self.output_rows = None
+        self.output_batch = None
     def _open(self) -> None:
         """Create row batch and populate with self.input_rows"""
@@ -56,8 +56,9 @@ class InMemoryDataNode(ExecNode):
         }
         output_slot_idxs = {e.slot_idx for e in self.output_exprs}
-        self.output_rows = DataRowBatch(self.tbl, self.row_builder, len(self.input_rows))
-        for row_idx, input_row in enumerate(self.input_rows):
+        self.output_batch = DataRowBatch(self.row_builder)
+        for input_row in self.input_rows:
+            output_row = self.row_builder.make_row()
             # populate the output row with the values provided in the input row
             input_slot_idxs: set[int] = set()
             for col_name, val in input_row.items():
@@ -67,10 +68,10 @@ class InMemoryDataNode(ExecNode):
                 if col.col_type.is_image_type() and isinstance(val, bytes):
                     # this is a literal media file, ie, a sequence of bytes; save it as a binary file and store the path
                     assert col.tbl.id == self.tbl.id
-                    path = MediaStore.save_media_file(val, col.tbl.id, col.id, col.tbl.version)
-                    self.output_rows[row_idx][col_info.slot_idx] = str(path)
+                    filepath, _ = MediaStore.save_media_object(val, col, format=None)
+                    output_row[col_info.slot_idx] = str(filepath)
                 else:
-                    self.output_rows[row_idx][col_info.slot_idx] = val
+                    output_row[col_info.slot_idx] = val
                 input_slot_idxs.add(col_info.slot_idx)
@@ -79,10 +80,11 @@ class InMemoryDataNode(ExecNode):
             for slot_idx in missing_slot_idxs:
                 col_info = output_cols_by_idx.get(slot_idx)
                 assert col_info is not None
-                self.output_rows[row_idx][col_info.slot_idx] = None
+                output_row[col_info.slot_idx] = None
+            self.output_batch.add_row(output_row)
-        self.ctx.num_rows = len(self.output_rows)
+        self.ctx.num_rows = len(self.output_batch)
     async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
-        _logger.debug(f'InMemoryDataNode: created row batch with {len(self.output_rows)} output_rows')
-        yield self.output_rows
+        _logger.debug(f'InMemoryDataNode: created row batch with {len(self.output_batch)} rows')
+        yield self.output_batch

pixeltable/exec/sql_node.py CHANGED Viewed

@@ -316,8 +316,7 @@ class SqlNode(ExecNode):
             for _ in w:
                 pass
-        tbl_version = self.tbl.tbl_version if self.tbl is not None else None
-        output_batch = DataRowBatch(tbl_version, self.row_builder)
+        output_batch = DataRowBatch(self.row_builder)
         output_row: Optional[exprs.DataRow] = None
         num_rows_returned = 0
@@ -359,7 +358,7 @@ class SqlNode(ExecNode):
             if self.ctx.batch_size > 0 and len(output_batch) == self.ctx.batch_size:
                 _logger.debug(f'SqlScanNode: returning {len(output_batch)} rows')
                 yield output_batch
-                output_batch = DataRowBatch(tbl_version, self.row_builder)
+                output_batch = DataRowBatch(self.row_builder)
         if len(output_batch) > 0:
             _logger.debug(f'SqlScanNode: returning {len(output_batch)} rows')
@@ -569,10 +568,10 @@ class SqlSampleNode(SqlNode):
         General SQL form is:
         - MD5(<seed::text> [ + '___' + <rowid_col_val>::text]+
         """
-        sql_expr: sql.ColumnElement = sql.cast(seed, sql.Text)
+        sql_expr: sql.ColumnElement = seed.cast(sql.String)
         for e in sql_cols:
             # Quotes are required below to guarantee that the string is properly presented in SQL
-            sql_expr = sql_expr + sql.literal_column("'___'", sql.Text) + sql.cast(e, sql.Text)
+            sql_expr = sql_expr + sql.literal_column("'___'", sql.Text) + e.cast(sql.String)
         sql_expr = sql.func.md5(sql_expr)
         return sql_expr
@@ -591,9 +590,9 @@ class SqlSampleNode(SqlNode):
                 s_key = self._create_key_sql(self.input_cte)
                 # Construct a suitable where clause
-                fraction_sql = sql.cast(SampleClause.fraction_to_md5_hex(float(self.sample_clause.fraction)), sql.Text)
+                fraction_md5 = SampleClause.fraction_to_md5_hex(self.sample_clause.fraction)
                 order_by = self._create_key_sql(self.input_cte)
-                return sql.select(*self.input_cte.c).where(s_key < fraction_sql).order_by(order_by)
+                return sql.select(*self.input_cte.c).where(s_key < fraction_md5).order_by(order_by)
             return self._create_stmt_stratified_fraction(self.sample_clause.fraction)
         else:

pixeltable/exprs/data_row.py CHANGED Viewed

@@ -13,7 +13,8 @@ import PIL
 import PIL.Image
 import sqlalchemy as sql
-from pixeltable import env
+from pixeltable import catalog, env
+from pixeltable.utils.media_store import MediaStore
 class DataRow:
@@ -256,23 +257,22 @@ class DataRow:
             self.vals[idx] = val
         self.has_val[idx] = True
-    def flush_img(self, index: int, filepath: Optional[str] = None) -> None:
-        """Discard the in-memory value and save it to a local file, if filepath is not None"""
+    def flush_img(self, index: int, col: Optional[catalog.Column] = None) -> None:
+        """Save or discard the in-memory value (required to be a PIL.Image.Image)"""
         if self.vals[index] is None:
             return
         assert self.excs[index] is None
         if self.file_paths[index] is None:
-            if filepath is not None:
+            if col is not None:
                 image = self.vals[index]
-                assert isinstance(image, PIL.Image.Image)
-                # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
-                # In that case, use WebP instead.
-                format = 'webp' if image.has_transparency_data else 'jpeg'
-                if not filepath.endswith(f'.{format}'):
-                    filepath += f'.{format}'
-                self.file_paths[index] = filepath
-                self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
-                image.save(filepath, format=format)
+                format = None
+                if isinstance(image, PIL.Image.Image):
+                    # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
+                    # In that case, use WebP instead.
+                    format = 'webp' if image.has_transparency_data else 'jpeg'
+                filepath, url = MediaStore.save_media_object(image, col, format=format)
+                self.file_paths[index] = str(filepath)
+                self.file_urls[index] = url
             else:
                 # we discard the content of this cell
                 self.has_val[index] = False

pixeltable/exprs/row_builder.py CHANGED Viewed

@@ -8,9 +8,8 @@ from uuid import UUID
 import numpy as np
-from pixeltable import catalog, exceptions as excs, utils
+from pixeltable import catalog, exceptions as excs, exprs, utils
 from pixeltable.env import Env
-from pixeltable.utils.media_store import MediaStore
 from .data_row import DataRow
 from .expr import Expr, ExprScope
@@ -85,6 +84,10 @@ class RowBuilder:
     # (a subexpr can be shared across multiple output exprs)
     output_expr_ids: list[set[int]]
+    img_slot_idxs: list[int]  # Indices of image slots
+    media_slot_idxs: list[int]  # Indices of non-image media slots
+    array_slot_idxs: list[int]  # Indices of array slots
     @dataclass
     class EvalCtx:
         """Context for evaluating a set of target exprs"""
@@ -235,6 +238,12 @@ class RowBuilder:
         for e in self.output_exprs:
             self._record_output_expr_id(e, e.slot_idx)
+        self.img_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_image_type()]
+        self.media_slot_idxs = [
+            e.slot_idx for e in self.unique_exprs if e.col_type.is_media_type() and not e.col_type.is_image_type()
+        ]
+        self.array_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_array_type()]
     def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
         """Record a column that is part of the table row"""
         assert self.tbl is not None
@@ -462,8 +471,7 @@ class RowBuilder:
             else:
                 if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
                     # we have yet to store this image
-                    filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
-                    data_row.flush_img(slot_idx, filepath)
+                    data_row.flush_img(slot_idx, col)
                 val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
                 table_row.append(val)
                 if col.stores_cellmd:
@@ -489,3 +497,7 @@ class RowBuilder:
                 store_col_names.append(col.col.cellmd_store_name())
         return store_col_names, media_cols
+    def make_row(self) -> exprs.DataRow:
+        """Creates a new DataRow with the current row_builder's configuration."""
+        return exprs.DataRow(self.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs)

pixeltable/exprs/string_op.py CHANGED Viewed

@@ -68,7 +68,7 @@ class StringOp(Expr):
         if self.operator == StringOperator.CONCAT:
             return left.concat(right)
         if self.operator == StringOperator.REPEAT:
-            return sql.func.repeat(sql.cast(left, sql.String), sql.cast(right, sql.Integer))
+            return sql.func.repeat(left.cast(sql.String), right.cast(sql.Integer))
         return None
     def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:

pixeltable/func/expr_template_function.py CHANGED Viewed

@@ -101,13 +101,10 @@ class ExprTemplateFunction(Function):
         return None
     def exec(self, args: Sequence[Any], kwargs: dict[str, Any]) -> Any:
-        from pixeltable import exec
         assert not self.is_polymorphic
         expr = self.instantiate(args, kwargs)
         row_builder = exprs.RowBuilder(output_exprs=[expr], columns=[], input_exprs=[])
-        row_batch = exec.DataRowBatch(tbl=None, row_builder=row_builder, num_rows=1)
-        row = row_batch[0]
+        row = row_builder.make_row()
         row_builder.eval(row, ctx=row_builder.default_eval_ctx)
         return row[row_builder.get_output_exprs()[0].slot_idx]

pixeltable/functions/date.py CHANGED Viewed

@@ -83,7 +83,7 @@ def make_date(year: int, month: int, day: int) -> date:
 @make_date.to_sql
 def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
-    return sql.func.make_date(sql.cast(year, sql.Integer), sql.cast(month, sql.Integer), sql.cast(day, sql.Integer))
+    return sql.func.make_date(year.cast(sql.Integer), month.cast(sql.Integer), day.cast(sql.Integer))
 @pxt.udf(is_method=True)

pixeltable/functions/math.py CHANGED Viewed

@@ -97,7 +97,7 @@ def _(self: sql.ColumnElement, digits: Optional[sql.ColumnElement] = None) -> sq
     if digits is None:
         return sql.func.round(self)
     else:
-        return sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer))
+        return sql.func.round(self.cast(sql.Numeric), digits.cast(sql.Integer))
 @pxt.udf(is_method=True)

pixeltable/functions/openai.py CHANGED Viewed

@@ -31,11 +31,15 @@ _logger = logging.getLogger('pixeltable')
 @env.register_client('openai')
-def _(api_key: str) -> 'openai.AsyncOpenAI':
+def _(api_key: str, base_url: Optional[str] = None, api_version: Optional[str] = None) -> 'openai.AsyncOpenAI':
     import openai
+    default_query = None if api_version is None else {'api-version': api_version}
     return openai.AsyncOpenAI(
         api_key=api_key,
+        base_url=base_url,
+        default_query=default_query,
         # recommended to increase limits for async client to avoid connection errors
         http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
     )
@@ -124,7 +128,7 @@ _header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d
 def _parse_header_duration(duration_str: str) -> datetime.timedelta:
     match = _header_duration_pattern.match(duration_str)
     if not match:
-        raise ValueError('Invalid duration format')
+        raise ValueError(f'Invalid duration format: {duration_str}')
     days = int(match.group(1) or 0)
     hours = int(match.group(2) or 0)
@@ -147,7 +151,7 @@ def _get_header_info(
         requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
         requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
         requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
-        requests_reset_str = headers.get('x-ratelimit-reset-requests')
+        requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s')  # Default to 5 seconds
         requests_reset_ts = now + _parse_header_duration(requests_reset_str)
         requests_info = (requests_limit, requests_remaining, requests_reset_ts)
@@ -157,7 +161,7 @@ def _get_header_info(
         tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
         tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
         tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
-        tokens_reset_str = headers.get('x-ratelimit-reset-tokens')
+        tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s')  # Default to 5 seconds
         tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
         tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)

pixeltable 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl