PyPI - pixeltable - Versions diffs - 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl - Mend

pixeltable 0.4.15py3-none-any.whl → 0.4.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (68) hide show

pixeltable/__init__.py +4 -0
pixeltable/catalog/catalog.py +125 -63
pixeltable/catalog/column.py +7 -2
pixeltable/catalog/table.py +1 -0
pixeltable/catalog/table_metadata.py +4 -0
pixeltable/catalog/table_version.py +174 -117
pixeltable/catalog/table_version_handle.py +4 -1
pixeltable/catalog/table_version_path.py +0 -11
pixeltable/catalog/view.py +6 -0
pixeltable/config.py +7 -0
pixeltable/dataframe.py +10 -5
pixeltable/env.py +56 -19
pixeltable/exec/__init__.py +2 -0
pixeltable/exec/cell_materialization_node.py +231 -0
pixeltable/exec/cell_reconstruction_node.py +135 -0
pixeltable/exec/exec_node.py +1 -1
pixeltable/exec/expr_eval/evaluators.py +1 -0
pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
pixeltable/exec/expr_eval/globals.py +2 -0
pixeltable/exec/globals.py +32 -0
pixeltable/exec/object_store_save_node.py +1 -4
pixeltable/exec/row_update_node.py +16 -9
pixeltable/exec/sql_node.py +107 -14
pixeltable/exprs/__init__.py +1 -1
pixeltable/exprs/arithmetic_expr.py +23 -18
pixeltable/exprs/column_property_ref.py +10 -10
pixeltable/exprs/column_ref.py +2 -2
pixeltable/exprs/data_row.py +106 -37
pixeltable/exprs/expr.py +9 -0
pixeltable/exprs/expr_set.py +14 -7
pixeltable/exprs/inline_expr.py +2 -19
pixeltable/exprs/json_path.py +45 -12
pixeltable/exprs/row_builder.py +54 -22
pixeltable/functions/__init__.py +1 -0
pixeltable/functions/bedrock.py +7 -0
pixeltable/functions/deepseek.py +11 -4
pixeltable/functions/llama_cpp.py +7 -0
pixeltable/functions/math.py +1 -1
pixeltable/functions/ollama.py +7 -0
pixeltable/functions/openai.py +4 -4
pixeltable/functions/openrouter.py +143 -0
pixeltable/functions/video.py +110 -28
pixeltable/globals.py +10 -4
pixeltable/io/globals.py +18 -17
pixeltable/io/parquet.py +1 -1
pixeltable/io/table_data_conduit.py +47 -22
pixeltable/iterators/document.py +61 -23
pixeltable/iterators/video.py +126 -53
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_40.py +73 -0
pixeltable/metadata/notes.py +1 -0
pixeltable/plan.py +175 -46
pixeltable/share/packager.py +155 -26
pixeltable/store.py +2 -3
pixeltable/type_system.py +5 -3
pixeltable/utils/arrow.py +6 -6
pixeltable/utils/av.py +65 -0
pixeltable/utils/console_output.py +4 -1
pixeltable/utils/exception_handler.py +5 -28
pixeltable/utils/image.py +7 -0
pixeltable/utils/misc.py +5 -0
pixeltable/utils/object_stores.py +16 -1
pixeltable/utils/s3_store.py +44 -11
{pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/METADATA +29 -28
{pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/RECORD +68 -61
{pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/WHEEL +0 -0
{pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/licenses/LICENSE +0 -0

pixeltable/exprs/data_row.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import dataclasses
 import datetime
 import io
 import urllib.parse
@@ -13,15 +14,72 @@ import PIL
 import PIL.Image
 import sqlalchemy as sql
+import pixeltable.utils.image as image_utils
 from pixeltable import catalog, env
 from pixeltable.utils.local_store import TempStore
+from pixeltable.utils.misc import non_none_dict_factory
+@dataclasses.dataclass
+class ArrayMd:
+    """
+    Metadata for array cells that are stored externally.
+    """
+    start: int
+    end: int
+    # we store bool arrays as packed bits (uint8 arrays), and need to record the shape to reconstruct the array
+    is_bool: bool = False
+    shape: tuple[int, ...] | None = None
+    def as_dict(self) -> dict:
+        # dict_factory: suppress Nones
+        x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
+        return x
+@dataclasses.dataclass
+class CellMd:
+    """
+    Content of the cellmd column.
+    All fields are optional, to minimize storage.
+    """
+    errortype: str | None = None
+    errormsg: str | None = None
+    # a list of file urls that are used to store images and arrays; only set for json and array columns
+    # for json columns: a list of all urls referenced in the column value
+    # for array columns: a single url
+    file_urls: list[str] | None = None
+    array_md: ArrayMd | None = None
+    @classmethod
+    def from_dict(cls, d: dict) -> CellMd:
+        x: CellMd
+        if 'array_md' in d:
+            d2 = d.copy()
+            del d2['array_md']
+            x = cls(**d2, array_md=ArrayMd(**d['array_md']))
+        else:
+            x = cls(**d)
+        return x
+    def as_dict(self) -> dict:
+        x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
+        return x
 class DataRow:
     """
     Encapsulates all data and execution state needed by RowBuilder and DataRowBatch:
     - state for in-memory computation
-    - state for storing the data
+    - state needed for expression evaluation
+    - containers for output column values
     This is not meant to be a black-box abstraction.
     In-memory representations by column type:
@@ -39,79 +97,92 @@ class DataRow:
     - DocumentType: local path if available, otherwise url
     """
+    # expr evaluation state; indexed by slot idx
     vals: np.ndarray  # of object
     has_val: np.ndarray  # of bool
     excs: np.ndarray  # of object
-    # If `may_have_exc` is False, then we guarantee that no slot has an exception set. This is used to optimize
-    # exception handling under normal operation.
-    _may_have_exc: bool
-    # expr evaluation state; indexed by slot idx
     missing_slots: np.ndarray  # of bool; number of missing dependencies
     missing_dependents: np.ndarray  # of int16; number of missing dependents
     is_scheduled: np.ndarray  # of bool; True if this slot is scheduled for evaluation
-    # control structures that are shared across all DataRows in a batch
-    img_slot_idxs: list[int]
-    media_slot_idxs: list[int]
-    array_slot_idxs: list[int]
-    # the primary key of a store row is a sequence of ints (the number is different for table vs view)
-    pk: Optional[tuple[int, ...]]
+    # CellMd needed for query execution; needs to be indexed by slot idx, not column id, to work for joins
+    slot_md: dict[int, CellMd]
     # file_urls:
     # - stored url of file for media in vals[i]
     # - None if vals[i] is not media type
     # - not None if file_paths[i] is not None
+    # TODO: this is a sparse vector; should it be a dict[int, str]?
     file_urls: np.ndarray  # of str
     # file_paths:
     # - local path of media file in vals[i]; points to the file cache if file_urls[i] is remote
     # - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
+    # TODO: this is a sparse vector; should it be a dict[int, str]?
     file_paths: np.ndarray  # of str
+    # If `may_have_exc` is False, then we guarantee that no slot has an exception set. This is used to optimize
+    # exception handling under normal operation.
+    _may_have_exc: bool
+    # the primary key of a store row is a sequence of ints (the number is different for table vs view)
+    pk: Optional[tuple[int, ...]]
     # for nested rows (ie, those produced by JsonMapperDispatcher)
     parent_row: Optional[DataRow]
     parent_slot_idx: Optional[int]
+    # state for table output (insert()/update()); key: column id
+    cell_vals: dict[int, Any]  # materialized values of output columns, in the format required for the column
+    cell_md: dict[int, CellMd]
+    # control structures that are shared across all DataRows in a batch
+    img_slot_idxs: list[int]
+    media_slot_idxs: list[int]
+    array_slot_idxs: list[int]
+    json_slot_idxs: list[int]
     def __init__(
         self,
         size: int,
         img_slot_idxs: list[int],
         media_slot_idxs: list[int],
         array_slot_idxs: list[int],
+        json_slot_idxs: list[int],
         parent_row: Optional[DataRow] = None,
         parent_slot_idx: Optional[int] = None,
     ):
-        self.img_slot_idxs = img_slot_idxs
-        self.media_slot_idxs = media_slot_idxs
-        self.array_slot_idxs = array_slot_idxs
         self.init(size)
         self.parent_row = parent_row
         self.parent_slot_idx = parent_slot_idx
-    def init(self, num_slots: int) -> None:
-        self.vals = np.full(num_slots, None, dtype=object)
-        self.has_val = np.zeros(num_slots, dtype=bool)
-        self.excs = np.full(num_slots, None, dtype=object)
+        self.img_slot_idxs = img_slot_idxs
+        self.media_slot_idxs = media_slot_idxs
+        self.array_slot_idxs = array_slot_idxs
+        self.json_slot_idxs = json_slot_idxs
+    def init(self, size: int) -> None:
+        self.vals = np.full(size, None, dtype=object)
+        self.has_val = np.zeros(size, dtype=bool)
+        self.excs = np.full(size, None, dtype=object)
+        self.missing_slots = np.zeros(size, dtype=bool)
+        self.missing_dependents = np.zeros(size, dtype=np.int16)
+        self.is_scheduled = np.zeros(size, dtype=bool)
+        self.slot_md = {}
+        self.file_urls = np.full(size, None, dtype=object)
+        self.file_paths = np.full(size, None, dtype=object)
         self._may_have_exc = False
-        self.missing_slots = np.zeros(num_slots, dtype=bool)
-        self.missing_dependents = np.zeros(num_slots, dtype=np.int16)
-        self.is_scheduled = np.zeros(num_slots, dtype=bool)
+        self.cell_vals = {}
+        self.cell_md = {}
         self.pk = None
-        self.file_urls = np.full(num_slots, None, dtype=object)
-        self.file_paths = np.full(num_slots, None, dtype=object)
         self.parent_row = None
         self.parent_slot_idx = None
-    def clear(self, idxs: Optional[np.ndarray] = None) -> None:
-        if idxs is not None:
-            self.has_val[idxs] = False
-            self.vals[idxs] = None
-            self.excs[idxs] = None
-            self.file_urls[idxs] = None
-            self.file_paths[idxs] = None
+    def clear(self, slot_idxs: Optional[np.ndarray] = None) -> None:
+        if slot_idxs is not None:
+            self.has_val[slot_idxs] = False
+            self.vals[slot_idxs] = None
+            self.excs[slot_idxs] = None
+            self.file_urls[slot_idxs] = None
+            self.file_paths[slot_idxs] = None
         else:
             self.init(len(self.vals))
@@ -292,9 +363,7 @@ class DataRow:
         val = self.vals[index]
         format = None
         if isinstance(val, PIL.Image.Image):
-            # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
-            # In that case, use WebP instead.
-            format = 'webp' if val.has_transparency_data else 'jpeg'
+            format = image_utils.default_format(val)
         filepath, url = TempStore.save_media_object(val, col, format=format)
         self.file_paths[index] = str(filepath) if filepath is not None else None
         self.vals[index] = None

pixeltable/exprs/expr.py CHANGED Viewed

@@ -368,6 +368,15 @@ class Expr(abc.ABC):
         for e in expr_list:
             yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
+    @classmethod
+    def list_contains(
+        cls,
+        expr_list: Iterable[Expr],
+        expr_class: type[Expr] | None = None,
+        filter: Callable[[Expr], bool] | None = None,
+    ) -> bool:
+        return any(e._contains(expr_class, filter) for e in expr_list)
     def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
         """
         Returns True if any subexpr is an instance of cls and/or matches filter.

pixeltable/exprs/expr_set.py CHANGED Viewed

@@ -9,26 +9,33 @@ T = TypeVar('T', bound='Expr')
 class ExprSet(Generic[T]):
     """
-    A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
+    An ordered set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by
+    Expr.id.
     """
     exprs: dict[int, T]  # key: Expr.id
+    expr_offsets: dict[int, int]  # key: Expr.id, value: offset into self.exprs.keys()
     exprs_by_idx: dict[int, T]  # key: slot_idx
     def __init__(self, elements: Optional[Iterable[T]] = None):
         self.exprs = {}
+        self.expr_offsets = {}
         self.exprs_by_idx = {}
         if elements is not None:
             for e in elements:
                 self.add(e)
-    def add(self, expr: T) -> None:
-        if expr.id in self.exprs:
-            return
+    def add(self, expr: T) -> int:
+        """Returns offset corresponding to iteration order"""
+        offset = self.expr_offsets.get(expr.id)
+        if offset is not None:
+            return offset
+        offset = len(self.exprs)
         self.exprs[expr.id] = expr
-        if expr.slot_idx is None:
-            return
-        self.exprs_by_idx[expr.slot_idx] = expr
+        self.expr_offsets[expr.id] = offset
+        if expr.slot_idx is not None:
+            self.exprs_by_idx[expr.slot_idx] = expr
+        return offset
     def update(self, *others: Iterable[T]) -> None:
         for other in others:

pixeltable/exprs/inline_expr.py CHANGED Viewed

@@ -98,13 +98,7 @@ class InlineList(Expr):
     def __init__(self, elements: Iterable):
         exprs = [Expr.from_object(el) for el in elements]
-        json_schema = {
-            'type': 'array',
-            'prefixItems': [expr.col_type.to_json_schema() for expr in exprs],
-            'items': False,  # No additional items (fixed length)
-        }
-        super().__init__(ts.JsonType(json_schema))
+        super().__init__(ts.JsonType())
         self.components.extend(exprs)
         self.id = self._create_id()
@@ -150,18 +144,7 @@ class InlineDict(Expr):
             self.keys.append(key)
             exprs.append(Expr.from_object(val))
-        json_schema: Optional[dict[str, Any]]
-        try:
-            json_schema = {
-                'type': 'object',
-                'properties': {key: expr.col_type.to_json_schema() for key, expr in zip(self.keys, exprs)},
-            }
-        except excs.Error:
-            # InlineDicts are used to store iterator arguments, which are not required to be valid JSON types,
-            # so we can't always construct a valid schema.
-            json_schema = None
-        super().__init__(ts.JsonType(json_schema))
+        super().__init__(ts.JsonType())
         self.components.extend(exprs)
         self.id = self._create_id()

pixeltable/exprs/json_path.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from __future__ import annotations
+import io
+from pathlib import Path
 from typing import Any, Optional
 import jmespath
@@ -7,6 +9,7 @@ import sqlalchemy as sql
 from pixeltable import catalog, exceptions as excs, type_system as ts
+from .column_ref import ColumnRef
 from .data_row import DataRow
 from .expr import Expr
 from .globals import print_slice
@@ -23,6 +26,11 @@ class JsonPath(Expr):
     (0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
     """
+    path_elements: list[str | int | slice]
+    compiled_path: jmespath.parser.ParsedResult | None
+    scope_idx: int
+    file_handles: dict[Path, io.BufferedReader]  # key: file path
     def __init__(
         self, anchor: Optional[Expr], path_elements: Optional[list[str | int | slice]] = None, scope_idx: int = 0
     ) -> None:
@@ -31,16 +39,22 @@ class JsonPath(Expr):
         super().__init__(ts.JsonType(nullable=True))  # JsonPath expressions are always nullable
         if anchor is not None:
             self.components = [anchor]
-        self.path_elements: list[str | int | slice] = path_elements
+        self.path_elements = path_elements
         self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
         self.scope_idx = scope_idx
         # NOTE: the _create_id() result will change if set_anchor() gets called;
         # this is not a problem, because _create_id() shouldn't be called after init()
         self.id = self._create_id()
+        self.file_handles = {}
+    def release(self) -> None:
+        for fh in self.file_handles.values():
+            fh.close()
+        self.file_handles.clear()
     def __repr__(self) -> str:
         # else 'R': the anchor is RELATIVE_PATH_ROOT
-        anchor_str = str(self._anchor) if self._anchor is not None else 'R'
+        anchor_str = str(self.anchor) if self.anchor is not None else 'R'
         if len(self.path_elements) == 0:
             return anchor_str
         return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
@@ -67,7 +81,7 @@ class JsonPath(Expr):
         return cls(anchor, path_elements, d['scope_idx'])
     @property
-    def _anchor(self) -> Optional[Expr]:
+    def anchor(self) -> Optional[Expr]:
         return None if len(self.components) == 0 else self.components[0]
     def set_anchor(self, anchor: Expr) -> None:
@@ -75,7 +89,7 @@ class JsonPath(Expr):
         self.components = [anchor]
     def is_relative_path(self) -> bool:
-        return self._anchor is None
+        return self.anchor is None
     def _has_relative_path(self) -> bool:
         return self.is_relative_path() or super()._has_relative_path()
@@ -85,7 +99,7 @@ class JsonPath(Expr):
             # TODO: take scope_idx into account
             self.set_anchor(mapper.scope_anchor)
         else:
-            self._anchor._bind_rel_paths(mapper)
+            self.anchor._bind_rel_paths(mapper)
     def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
         """
@@ -99,15 +113,15 @@ class JsonPath(Expr):
     def __getattr__(self, name: str) -> 'JsonPath':
         assert isinstance(name, str)
-        return JsonPath(self._anchor, [*self.path_elements, name])
+        return JsonPath(self.anchor, [*self.path_elements, name])
     def __getitem__(self, index: object) -> 'JsonPath':
         if isinstance(index, (int, slice, str)):
-            return JsonPath(self._anchor, [*self.path_elements, index])
+            return JsonPath(self.anchor, [*self.path_elements, index])
         raise excs.Error(f'Invalid json list index: {index}')
     def default_column_name(self) -> Optional[str]:
-        anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
+        anchor_name = self.anchor.default_column_name() if self.anchor is not None else ''
         ret_name = f'{anchor_name}.{self._json_path()}'
         def cleanup_char(s: str) -> str:
@@ -159,12 +173,31 @@ class JsonPath(Expr):
                 result.append(f'[{print_slice(element)}]')
         return ''.join(result)
-    def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
-        assert self._anchor is not None, self
-        val = data_row[self._anchor.slot_idx]
+    def eval(self, row: DataRow, row_builder: RowBuilder) -> None:
+        assert self.anchor is not None, self
+        val = row[self.anchor.slot_idx]
         if self.compiled_path is not None:
             val = self.compiled_path.search(val)
-        data_row[self.slot_idx] = val
+        row[self.slot_idx] = val
+        if val is None or self.anchor is None or not isinstance(self.anchor, ColumnRef):
+            return
+        # the origin of val is a json-typed column, which might stored inlined objects
+        if self.anchor.slot_idx not in row.slot_md:
+            # we can infer that there aren't any inlined objects because our execution plan doesn't include
+            # materializing the cellmd (eg, insert plans)
+            # TODO: have the planner pass that fact into ExprEvalNode explicitly to streamline this path a bit more
+            return
+        # defer import until it's needed
+        from pixeltable.exec.cell_reconstruction_node import json_has_inlined_objs, reconstruct_json
+        cell_md = row.slot_md[self.anchor.slot_idx]
+        if cell_md is None or cell_md.file_urls is None or not json_has_inlined_objs(val):
+            # val doesn't contain inlined objects
+            return
+        row.vals[self.slot_idx] = reconstruct_json(val, cell_md.file_urls, self.file_handles)
 RELATIVE_PATH_ROOT = JsonPath(None)

pixeltable/exprs/row_builder.py CHANGED Viewed

@@ -1,15 +1,17 @@
 from __future__ import annotations
+import dataclasses
 import sys
 import time
-from dataclasses import dataclass
 from typing import Any, Iterable, NamedTuple, Optional, Sequence
 from uuid import UUID
 import numpy as np
+import sqlalchemy as sql
 from pixeltable import catalog, exceptions as excs, exprs, utils
 from pixeltable.env import Env
+from pixeltable.utils.misc import non_none_dict_factory
 from .data_row import DataRow
 from .expr import Expr, ExprScope
@@ -68,7 +70,7 @@ class RowBuilder:
     input_exprs: ExprSet
     tbl: Optional[catalog.TableVersion]  # reference table of the RowBuilder; used to identify pk columns for writes
-    table_columns: list[ColumnSlotIdx]
+    table_columns: dict[catalog.Column, int | None]  # value: slot idx, if the result of an expr
     default_eval_ctx: EvalCtx
     unstored_iter_args: dict[UUID, Expr]
@@ -92,10 +94,9 @@ class RowBuilder:
     img_slot_idxs: list[int]  # Indices of image slots
     media_slot_idxs: list[int]  # Indices of non-image media slots
     array_slot_idxs: list[int]  # Indices of array slots
-    stored_img_cols: list[exprs.ColumnSlotIdx]
-    stored_media_cols: list[exprs.ColumnSlotIdx]
+    json_slot_idxs: list[int]  # Indices of json slots
-    @dataclass
+    @dataclasses.dataclass
     class EvalCtx:
         """Context for evaluating a set of target exprs"""
@@ -113,8 +114,6 @@ class RowBuilder:
     ):
         self.unique_exprs: ExprSet[Expr] = ExprSet()  # dependencies precede their dependents
         self.next_slot_idx = 0
-        self.stored_img_cols = []
-        self.stored_media_cols = []
         # record input and output exprs; make copies to avoid reusing execution state
         unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
@@ -138,7 +137,7 @@ class RowBuilder:
         from .column_ref import ColumnRef
         self.tbl = tbl
-        self.table_columns: list[ColumnSlotIdx] = []
+        self.table_columns = {}
         self.input_exprs = ExprSet()
         validating_colrefs: dict[Expr, Expr] = {}  # key: non-validating colref, value: corresp. validating colref
         for col in columns:
@@ -245,17 +244,27 @@ class RowBuilder:
             e.slot_idx for e in self.unique_exprs if e.col_type.is_media_type() and not e.col_type.is_image_type()
         ]
         self.array_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_array_type()]
+        self.json_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_json_type()]
     def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
-        """Record a column that is part of the table row"""
+        """Record an output column for which the value is produced via expr evaluation"""
         assert self.tbl is not None
         assert col.is_stored
-        info = ColumnSlotIdx(col, slot_idx)
-        self.table_columns.append(info)
-        if col.col_type.is_media_type():
-            self.stored_media_cols.append(info)
-            if col.col_type.is_image_type():
-                self.stored_img_cols.append(info)
+        self.table_columns[col] = slot_idx
+    def add_table_columns(self, cols: list[catalog.Column]) -> None:
+        """Record output columns whose values are materialized into DataRow.cell_vals"""
+        for col in cols:
+            self.table_columns[col] = None
+    @property
+    def media_output_col_info(self) -> list[ColumnSlotIdx]:
+        """Return slot idxs for media output columns whose values are produced by expr evaluation"""
+        return [
+            ColumnSlotIdx(col, slot_idx)
+            for col, slot_idx in self.table_columns.items()
+            if col.col_type.is_media_type() and slot_idx is not None
+        ]
     @property
     def num_materialized(self) -> int:
@@ -462,13 +471,30 @@ class RowBuilder:
         num_excs = 0
         table_row: list[Any] = list(pk)
-        for col, slot_idx in self.table_columns:
+        # Nulls in JSONB columns need to be stored as sql.sql.null(), otherwise it stores a json 'null'
+        for col, slot_idx in self.table_columns.items():
+            if col.id in data_row.cell_vals:
+                table_row.append(data_row.cell_vals[col.id])
+                if col.stores_cellmd:
+                    if data_row.cell_md[col.id] is None:
+                        table_row.append(sql.sql.null())
+                    else:
+                        # we want to minimize the size of the stored dict and use dict_factory to remove Nones
+                        md = dataclasses.asdict(data_row.cell_md[col.id], dict_factory=non_none_dict_factory)
+                        assert len(md) > 0
+                        table_row.append(md)
+                if slot_idx is not None and data_row.has_exc(slot_idx):
+                    num_excs += 1
+                    if cols_with_excs is not None:
+                        cols_with_excs.add(col.id)
+                continue
             if data_row.has_exc(slot_idx):
                 exc = data_row.get_exc(slot_idx)
                 num_excs += 1
                 if cols_with_excs is not None:
                     cols_with_excs.add(col.id)
-                table_row.append(None)
+                table_row.append(sql.sql.null() if col.col_type.is_json_type() else None)
                 if col.stores_cellmd:
                     # exceptions get stored in the errortype/-msg properties of the cellmd column
                     table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
@@ -476,7 +502,7 @@ class RowBuilder:
                 val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
                 table_row.append(val)
                 if col.stores_cellmd:
-                    table_row.append(None)  # placeholder for cellmd column
+                    table_row.append(sql.sql.null())  # placeholder for cellmd column
         return table_row, num_excs
@@ -490,12 +516,18 @@ class RowBuilder:
         store_col_names: list[str] = [pk_col.name for pk_col in self.tbl.store_tbl.pk_columns()]
         for col in self.table_columns:
-            store_col_names.append(col.col.store_name())
-            if col.col.stores_cellmd:
-                store_col_names.append(col.col.cellmd_store_name())
+            store_col_names.append(col.store_name())
+            if col.stores_cellmd:
+                store_col_names.append(col.cellmd_store_name())
         return store_col_names
     def make_row(self) -> exprs.DataRow:
         """Creates a new DataRow with the current row_builder's configuration."""
-        return exprs.DataRow(self.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs)
+        return exprs.DataRow(
+            size=self.num_materialized,
+            img_slot_idxs=self.img_slot_idxs,
+            media_slot_idxs=self.media_slot_idxs,
+            array_slot_idxs=self.array_slot_idxs,
+            json_slot_idxs=self.json_slot_idxs,
+        )

pixeltable/functions/__init__.py CHANGED Viewed

@@ -19,6 +19,7 @@ from . import (
     mistralai,
     ollama,
     openai,
+    openrouter,
     replicate,
     string,
     timestamp,

pixeltable/functions/bedrock.py CHANGED Viewed

@@ -1,3 +1,10 @@
+"""
+Pixeltable UDFs for AWS Bedrock AI models.
+Provides integration with AWS Bedrock for accessing various foundation models
+including Anthropic Claude, Amazon Titan, and other providers.
+"""
 import logging
 from typing import TYPE_CHECKING, Any, Optional

pixeltable/functions/deepseek.py CHANGED Viewed

@@ -1,3 +1,10 @@
+"""
+Pixeltable UDFs for Deepseek AI models.
+Provides integration with Deepseek's language models for chat completions
+and other AI capabilities.
+"""
 import json
 from typing import TYPE_CHECKING, Any, Optional
@@ -67,10 +74,10 @@ async def chat_completions(
         of the table `tbl`:
         >>> messages = [
-                {'role': 'system', 'content': 'You are a helpful assistant.'},
-                {'role': 'user', 'content': tbl.prompt}
-            ]
-            tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
+        ...     {'role': 'system', 'content': 'You are a helpful assistant.'},
+        ...     {'role': 'user', 'content': tbl.prompt}
+        ... ]
+        >>> tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
     """
     if model_kwargs is None:
         model_kwargs = {}

pixeltable/functions/llama_cpp.py CHANGED Viewed

@@ -1,3 +1,10 @@
+"""
+Pixeltable UDFs for llama.cpp models.
+Provides integration with llama.cpp for running quantized language models locally,
+supporting chat completions and embeddings with GGUF format models.
+"""
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Optional

pixeltable/functions/math.py CHANGED Viewed

@@ -97,7 +97,7 @@ def _(self: sql.ColumnElement, digits: Optional[sql.ColumnElement] = None) -> sq
     if digits is None:
         return sql.func.round(self)
     else:
-        return sql.func.round(self.cast(sql.Numeric), digits.cast(sql.Integer))
+        return sql.cast(sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer)), sql.Float)
 @pxt.udf(is_method=True)

pixeltable 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.15py3-none-any.whl → 0.4.17py3-none-any.whl