PyPI - pixeltable - Versions diffs - 0.4.0rc1__py3-none-any.whl → 0.4.0rc3__py3-none-any.whl - Mend

pixeltable 0.4.0rc1py3-none-any.whl → 0.4.0rc3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (37) hide show

pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +4 -0
pixeltable/catalog/table.py +16 -0
pixeltable/catalog/table_version.py +17 -2
pixeltable/catalog/view.py +24 -1
pixeltable/dataframe.py +185 -9
pixeltable/env.py +2 -0
pixeltable/exec/__init__.py +1 -1
pixeltable/exec/expr_eval/evaluators.py +4 -1
pixeltable/exec/sql_node.py +152 -12
pixeltable/exprs/data_row.py +5 -3
pixeltable/exprs/expr.py +7 -0
pixeltable/exprs/literal.py +2 -0
pixeltable/func/tools.py +1 -1
pixeltable/functions/anthropic.py +19 -45
pixeltable/functions/deepseek.py +19 -38
pixeltable/functions/fireworks.py +9 -18
pixeltable/functions/gemini.py +2 -3
pixeltable/functions/llama_cpp.py +6 -6
pixeltable/functions/mistralai.py +15 -41
pixeltable/functions/ollama.py +1 -1
pixeltable/functions/openai.py +82 -165
pixeltable/functions/together.py +22 -80
pixeltable/globals.py +5 -0
pixeltable/metadata/__init__.py +11 -2
pixeltable/metadata/converters/convert_36.py +38 -0
pixeltable/metadata/notes.py +1 -0
pixeltable/metadata/schema.py +3 -0
pixeltable/plan.py +217 -10
pixeltable/share/packager.py +115 -6
pixeltable/utils/formatter.py +64 -42
pixeltable/utils/sample.py +25 -0
{pixeltable-0.4.0rc1.dist-info → pixeltable-0.4.0rc3.dist-info}/METADATA +2 -1
{pixeltable-0.4.0rc1.dist-info → pixeltable-0.4.0rc3.dist-info}/RECORD +37 -35
{pixeltable-0.4.0rc1.dist-info → pixeltable-0.4.0rc3.dist-info}/LICENSE +0 -0
{pixeltable-0.4.0rc1.dist-info → pixeltable-0.4.0rc3.dist-info}/WHEEL +0 -0
{pixeltable-0.4.0rc1.dist-info → pixeltable-0.4.0rc3.dist-info}/entry_points.txt +0 -0

pixeltable/functions/together.py CHANGED Viewed

@@ -7,7 +7,7 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
 import base64
 import io
-from typing import TYPE_CHECKING, Callable, Optional, TypeVar
+from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
 import numpy as np
 import PIL.Image
@@ -50,21 +50,7 @@ def _retry(fn: Callable[..., T]) -> Callable[..., T]:
 @pxt.udf(resource_pool='request-rate:together:chat')
-async def completions(
-    prompt: str,
-    *,
-    model: str,
-    max_tokens: Optional[int] = None,
-    stop: Optional[list] = None,
-    temperature: Optional[float] = None,
-    top_p: Optional[float] = None,
-    top_k: Optional[int] = None,
-    repetition_penalty: Optional[float] = None,
-    logprobs: Optional[int] = None,
-    echo: Optional[bool] = None,
-    n: Optional[int] = None,
-    safety_model: Optional[str] = None,
-) -> dict:
+async def completions(prompt: str, *, model: str, model_kwargs: Optional[dict[str, Any]] = None) -> dict:
     """
     Generate completions based on a given prompt using a specified model.
@@ -82,8 +68,8 @@ async def completions(
     Args:
         prompt: A string providing context for the model to complete.
         model: The name of the model to query.
-    For details on the other parameters, see: <https://docs.together.ai/reference/completions-1>
+        model_kwargs: Additional keyword arguments for the Together `completions` API.
+            For details on the available parameters, see: <https://docs.together.ai/reference/completions-1>
     Returns:
         A dictionary containing the response and other metadata.
@@ -94,41 +80,16 @@ async def completions(
         >>> tbl.add_computed_column(response=completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1'))
     """
-    result = await _together_client().completions.create(
-        prompt=prompt,
-        model=model,
-        max_tokens=max_tokens,
-        stop=stop,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k,
-        repetition_penalty=repetition_penalty,
-        logprobs=logprobs,
-        echo=echo,
-        n=n,
-        safety_model=safety_model,
-    )
+    if model_kwargs is None:
+        model_kwargs = {}
+    result = await _together_client().completions.create(prompt=prompt, model=model, **model_kwargs)
     return result.dict()
 @pxt.udf(resource_pool='request-rate:together:chat')
 async def chat_completions(
-    messages: list[dict[str, str]],
-    *,
-    model: str,
-    max_tokens: Optional[int] = None,
-    stop: Optional[list[str]] = None,
-    temperature: Optional[float] = None,
-    top_p: Optional[float] = None,
-    top_k: Optional[int] = None,
-    repetition_penalty: Optional[float] = None,
-    logprobs: Optional[int] = None,
-    echo: Optional[bool] = None,
-    n: Optional[int] = None,
-    safety_model: Optional[str] = None,
-    response_format: Optional[dict] = None,
-    tools: Optional[dict] = None,
-    tool_choice: Optional[dict] = None,
+    messages: list[dict[str, str]], *, model: str, model_kwargs: Optional[dict[str, Any]] = None
 ) -> dict:
     """
     Generate chat completions based on a given prompt using a specified model.
@@ -147,8 +108,8 @@ async def chat_completions(
     Args:
         messages: A list of messages comprising the conversation so far.
         model: The name of the model to query.
-    For details on the other parameters, see: <https://docs.together.ai/reference/chat-completions-1>
+        model_kwargs: Additional keyword arguments for the Together `chat/completions` API.
+            For details on the available parameters, see: <https://docs.together.ai/reference/chat-completions-1>
     Returns:
         A dictionary containing the response and other metadata.
@@ -160,23 +121,10 @@ async def chat_completions(
         >>> messages = [{'role': 'user', 'content': tbl.prompt}]
         ... tbl.add_computed_column(response=chat_completions(messages, model='mistralai/Mixtral-8x7B-v0.1'))
     """
-    result = await _together_client().chat.completions.create(
-        messages=messages,
-        model=model,
-        max_tokens=max_tokens,
-        stop=stop,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k,
-        repetition_penalty=repetition_penalty,
-        logprobs=logprobs,
-        echo=echo,
-        n=n,
-        safety_model=safety_model,
-        response_format=response_format,
-        tools=tools,
-        tool_choice=tool_choice,
-    )
+    if model_kwargs is None:
+        model_kwargs = {}
+    result = await _together_client().chat.completions.create(messages=messages, model=model, **model_kwargs)
     return result.dict()
@@ -236,14 +184,7 @@ def _(model: str) -> ts.ArrayType:
 @pxt.udf(resource_pool='request-rate:together:images')
 async def image_generations(
-    prompt: str,
-    *,
-    model: str,
-    steps: Optional[int] = None,
-    seed: Optional[int] = None,
-    height: Optional[int] = None,
-    width: Optional[int] = None,
-    negative_prompt: Optional[str] = None,
+    prompt: str, *, model: str, model_kwargs: Optional[dict[str, Any]] = None
 ) -> PIL.Image.Image:
     """
     Generate images based on a given prompt using a specified model.
@@ -262,8 +203,8 @@ async def image_generations(
     Args:
         prompt: A description of the desired images.
         model: The model to use for image generation.
-    For details on the other parameters, see: <https://docs.together.ai/reference/post_images-generations>
+        model_kwargs: Additional keyword args for the Together `images/generations` API.
+            For details on the available parameters, see: <https://docs.together.ai/reference/post_images-generations>
     Returns:
         The generated image.
@@ -276,9 +217,10 @@ async def image_generations(
         ...     response=image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0')
         ... )
     """
-    result = await _together_client().images.generate(
-        prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
-    )
+    if model_kwargs is None:
+        model_kwargs = {}
+    result = await _together_client().images.generate(prompt=prompt, model=model, **model_kwargs)
     if result.data[0].b64_json is not None:
         b64_bytes = base64.b64decode(result.data[0].b64_json)
         img = PIL.Image.open(io.BytesIO(b64_bytes))

pixeltable/globals.py CHANGED Viewed

@@ -249,13 +249,17 @@ def create_view(
     where: Optional[exprs.Expr] = None
     if isinstance(base, catalog.Table):
         tbl_version_path = base._tbl_version_path
+        sample_clause = None
     elif isinstance(base, DataFrame):
         base._validate_mutable('create_view', allow_select=True)
         if len(base._from_clause.tbls) > 1:
             raise excs.Error('Cannot create a view of a join')
         tbl_version_path = base._from_clause.tbls[0]
         where = base.where_clause
+        sample_clause = base.sample_clause
         select_list = base.select_list
+        if sample_clause is not None and not is_snapshot and not sample_clause.is_repeatable:
+            raise excs.Error('Non-snapshot views cannot be created with non-fractional or stratified sampling')
     else:
         raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
     assert isinstance(base, (catalog.Table, DataFrame))
@@ -280,6 +284,7 @@ def create_view(
         tbl_version_path,
         select_list=select_list,
         where=where,
+        sample_clause=sample_clause,
         additional_columns=additional_columns,
         is_snapshot=is_snapshot,
         iterator=iterator,

pixeltable/metadata/__init__.py CHANGED Viewed

@@ -8,15 +8,17 @@ from typing import Callable
 import sqlalchemy as sql
 from sqlalchemy import orm
+import pixeltable as pxt
+import pixeltable.exceptions as excs
 from pixeltable.utils.console_output import ConsoleLogger
 from .schema import SystemInfo, SystemInfoMd
 _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
+_logger = logging.getLogger('pixeltable')
 # current version of the metadata; this is incremented whenever the metadata schema changes
-VERSION = 36
+VERSION = 37
 def create_system_info(engine: sql.engine.Engine) -> None:
@@ -55,6 +57,13 @@ def upgrade_md(engine: sql.engine.Engine) -> None:
         system_info = session.query(SystemInfo).one().md
         md_version = system_info['schema_version']
         assert isinstance(md_version, int)
+        _logger.info(f'Current database version: {md_version}, installed version: {VERSION}')
+        if md_version > VERSION:
+            raise excs.Error(
+                'This Pixeltable database was created with a newer Pixeltable version '
+                f'than the one currently installed ({pxt.__version__}).\n'
+                'Please update to the latest Pixeltable version by running: pip install --upgrade pixeltable'
+            )
         if md_version == VERSION:
             return
         while md_version < VERSION:

pixeltable/metadata/converters/convert_36.py ADDED Viewed

@@ -0,0 +1,38 @@
+import logging
+from typing import Any, Optional
+from uuid import UUID
+import sqlalchemy as sql
+from pixeltable.metadata import register_converter
+from pixeltable.metadata.converters.util import convert_table_md
+_logger = logging.getLogger('pixeltable')
+@register_converter(version=36)
+def _(engine: sql.engine.Engine) -> None:
+    convert_table_md(engine, table_md_updater=__update_table_md, substitution_fn=__substitute_md)
+def __update_table_md(table_md: dict, table_id: UUID) -> None:
+    """Update the view metadata to add the sample_clause field if it is missing
+    Args:
+        table_md (dict): copy of the original table metadata. this gets updated in place.
+        table_id (UUID): the table id
+    """
+    if table_md['view_md'] is None:
+        return
+    if 'sample_clause' not in table_md['view_md']:
+        table_md['view_md']['sample_clause'] = None
+        _logger.info(f'Updating view metadata for table: {table_id}')
+def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
+    if isinstance(v, dict) and (v.get('_classname') == 'DataFrame'):
+        if 'sample_clause' not in v:
+            v['sample_clause'] = None
+        return k, v
+    return None

pixeltable/metadata/notes.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # rather than as a comment, so that the existence of a description can be enforced by
 # the unit tests when new versions are added.
 VERSION_NOTES = {
+    37: 'Add support for the sample() method on DataFrames',
     36: 'Added Table.lock_dummy',
     35: 'Track reference_tbl in ColumnRef',
     34: 'Set default value for is_pk field in column metadata to False',

pixeltable/metadata/schema.py CHANGED Viewed

@@ -147,6 +147,9 @@ class ViewMd:
     # filter predicate applied to the base table; view-only
     predicate: Optional[dict[str, Any]]
+    # sampling predicate applied to the base table; view-only
+    sample_clause: Optional[dict[str, Any]]
     # ComponentIterator subclass; only for component views
     iterator_class_fqn: Optional[str]

pixeltable/plan.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import dataclasses
 import enum
 from textwrap import dedent
-from typing import Any, Iterable, Literal, Optional, Sequence
+from typing import Any, Iterable, Literal, NamedTuple, Optional, Sequence
 from uuid import UUID
 import sqlalchemy as sql
@@ -12,6 +12,7 @@ import pixeltable as pxt
 from pixeltable import catalog, exceptions as excs, exec, exprs
 from pixeltable.catalog import Column, TableVersionHandle
 from pixeltable.exec.sql_node import OrderByClause, OrderByItem, combine_order_by_clauses, print_order_by_clause
+from pixeltable.utils.sample import sample_key
 def _is_agg_fn_call(e: exprs.Expr) -> bool:
@@ -75,6 +76,98 @@ class FromClause:
     tbls: list[catalog.TableVersionPath]
     join_clauses: list[JoinClause] = dataclasses.field(default_factory=list)
+    @property
+    def _first_tbl(self) -> catalog.TableVersionPath:
+        assert len(self.tbls) == 1
+        return self.tbls[0]
+@dataclasses.dataclass
+class SampleClause:
+    """Defines a sampling clause for a table."""
+    version: Optional[int]
+    n: Optional[int]
+    n_per_stratum: Optional[int]
+    fraction: Optional[float]
+    seed: Optional[int]
+    stratify_exprs: Optional[list[exprs.Expr]]
+    # This seed value is used if one is not supplied
+    DEFAULT_SEED = 0
+    # The version of the hashing algorithm used for ordering and fractional sampling.
+    CURRENT_VERSION = 1
+    def __post_init__(self) -> None:
+        """If no version was provided, provide the default version"""
+        if self.version is None:
+            self.version = self.CURRENT_VERSION
+        if self.seed is None:
+            self.seed = self.DEFAULT_SEED
+    @property
+    def is_stratified(self) -> bool:
+        """Check if the sampling is stratified"""
+        return self.stratify_exprs is not None and len(self.stratify_exprs) > 0
+    @property
+    def is_repeatable(self) -> bool:
+        """Return true if the same rows will continue to be sampled if source rows are added or deleted."""
+        return not self.is_stratified and self.fraction is not None
+    def display_str(self, inline: bool = False) -> str:
+        return str(self)
+    def as_dict(self) -> dict:
+        """Return a dictionary representation of the object"""
+        d = dataclasses.asdict(self)
+        d['_classname'] = self.__class__.__name__
+        if self.is_stratified:
+            d['stratify_exprs'] = [e.as_dict() for e in self.stratify_exprs]
+        return d
+    @classmethod
+    def from_dict(cls, d: dict) -> SampleClause:
+        """Create a SampleClause from a dictionary representation"""
+        d_cleaned = {key: value for key, value in d.items() if key != '_classname'}
+        s = cls(**d_cleaned)
+        if s.is_stratified:
+            s.stratify_exprs = [exprs.Expr.from_dict(e) for e in d_cleaned.get('stratify_exprs', [])]
+        return s
+    def __repr__(self) -> str:
+        s = ','.join(e.display_str(inline=True) for e in self.stratify_exprs)
+        return (
+            f'sample_{self.version}(n={self.n}, n_per_stratum={self.n_per_stratum}, '
+            f'fraction={self.fraction}, seed={self.seed}, [{s}])'
+        )
+    @classmethod
+    def fraction_to_md5_hex(cls, fraction: float) -> str:
+        """Return the string representation of an approximation (to ~1e-9) of a fraction of the total space
+        of md5 hash values.
+        This is used for fractional sampling.
+        """
+        # Maximum count for the upper 32 bits of MD5: 2^32
+        max_md5_value = (2**32) - 1
+        # Calculate the fraction of this value
+        threshold_int = max_md5_value * int(1_000_000_000 * fraction) // 1_000_000_000
+        # Convert to hexadecimal string with padding
+        return format(threshold_int, '08x') + 'ffffffffffffffffffffffff'
+class SamplingClauses(NamedTuple):
+    """Clauses provided when rewriting a SampleClause"""
+    where: exprs.Expr
+    group_by_clause: Optional[list[exprs.Expr]]
+    order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
+    limit: Optional[exprs.Expr]
+    sample_clause: Optional[SampleClause]
 class Analyzer:
     """
@@ -260,7 +353,7 @@ class Planner:
     # TODO: create an exec.CountNode and change this to create_count_plan()
     @classmethod
     def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None) -> sql.Select:
-        stmt = sql.select(sql.func.count())
+        stmt = sql.select(sql.func.count().label('all_count'))
         refd_tbl_ids: set[UUID] = set()
         if where_clause is not None:
             analyzer = cls.analyze(tbl, where_clause)
@@ -322,6 +415,13 @@ class Planner:
         )
         return plan
+    @classmethod
+    def rowid_columns(cls, target: TableVersionHandle, num_rowid_cols: Optional[int] = None) -> list[exprs.Expr]:
+        """Return list of RowidRef for the given number of associated rowids"""
+        if num_rowid_cols is None:
+            num_rowid_cols = target.get().num_rowid_columns()
+        return [exprs.RowidRef(target, i) for i in range(num_rowid_cols)]
     @classmethod
     def create_df_insert_plan(
         cls, tbl: catalog.TableVersion, df: 'pxt.DataFrame', ignore_errors: bool
@@ -591,7 +691,24 @@ class Planner:
         # 2. for component views: iterator args
         iterator_args = [target.iterator_args] if target.iterator_args is not None else []
-        row_builder = exprs.RowBuilder(iterator_args, stored_cols, [])
+        # If this contains a sample specification, modify / create where, group_by, order_by, and limit clauses
+        from_clause = FromClause(tbls=[view.base])
+        where, group_by_clause, order_by_clause, limit, sample_clause = cls.create_sample_clauses(
+            from_clause, target.sample_clause, target.predicate, None, [], None
+        )
+        # if we're propagating an insert, we only want to see those base rows that were created for the current version
+        base_analyzer = Analyzer(
+            from_clause,
+            iterator_args,
+            where_clause=where,
+            group_by_clause=group_by_clause,
+            order_by_clause=order_by_clause,
+        )
+        row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [])
+        if target.sample_clause is not None and base_analyzer.filter is not None:
+            raise excs.Error(f'Filter {base_analyzer.filter} not expressible in SQL')
         # execution plan:
         # 1. materialize exprs computed from the base that are needed for stored view columns
@@ -603,13 +720,22 @@ class Planner:
             for e in row_builder.default_eval_ctx.target_exprs
             if e.is_bound_by([view]) and not e.is_bound_by([view.base])
         ]
-        # if we're propagating an insert, we only want to see those base rows that were created for the current version
-        base_analyzer = Analyzer(FromClause(tbls=[view.base]), base_output_exprs, where_clause=target.predicate)
+        # Create a new analyzer reflecting exactly what is required from the base table
+        base_analyzer = Analyzer(
+            from_clause,
+            base_output_exprs,
+            where_clause=where,
+            group_by_clause=group_by_clause,
+            order_by_clause=order_by_clause,
+        )
         base_eval_ctx = row_builder.create_eval_ctx(base_analyzer.all_exprs)
         plan = cls._create_query_plan(
             row_builder=row_builder,
             analyzer=base_analyzer,
             eval_ctx=base_eval_ctx,
+            limit=limit,
+            sample_clause=sample_clause,
             with_pk=True,
             exact_version_only=view.get_bases() if propagates_insert else [],
         )
@@ -692,6 +818,62 @@ class Planner:
         prefetch_node = exec.CachePrefetchNode(tbl_id, file_col_info, input_node)
         return prefetch_node
+    @classmethod
+    def create_sample_clauses(
+        cls,
+        from_clause: FromClause,
+        sample_clause: SampleClause,
+        where_clause: Optional[exprs.Expr],
+        group_by_clause: Optional[list[exprs.Expr]],
+        order_by_clause: Optional[list[tuple[exprs.Expr, bool]]],
+        limit: Optional[exprs.Expr],
+    ) -> SamplingClauses:
+        """tuple[
+            exprs.Expr,
+            Optional[list[exprs.Expr]],
+            Optional[list[tuple[exprs.Expr, bool]]],
+            Optional[exprs.Expr],
+            Optional[SampleClause],
+        ]:"""
+        """Construct clauses required for sampling under various conditions.
+        If there is no sampling, then return the original clauses.
+        If the sample is stratified, then return only the group by clause. The rest of the
+        mechanism for stratified sampling is provided by the SampleSqlNode.
+        If the sample is non-stratified, then rewrite the query to accommodate the supplied where clause,
+        and provide the other clauses required for sampling
+        """
+        # If no sample clause, return the original clauses
+        if sample_clause is None:
+            return SamplingClauses(where_clause, group_by_clause, order_by_clause, limit, None)
+        # If the sample clause is stratified, create a group by clause
+        if sample_clause.is_stratified:
+            group_by = sample_clause.stratify_exprs
+            # Note that limit is not possible here
+            return SamplingClauses(where_clause, group_by, order_by_clause, None, sample_clause)
+        else:
+            # If non-stratified sampling, construct a where clause, order_by, and limit clauses
+            # Construct an expression for sorting rows and limiting row counts
+            s_key = sample_key(
+                exprs.Literal(sample_clause.seed), *cls.rowid_columns(from_clause._first_tbl.tbl_version)
+            )
+            # Construct a suitable where clause
+            where = where_clause
+            if sample_clause.fraction is not None:
+                fraction_md5_hex = exprs.Expr.from_object(
+                    sample_clause.fraction_to_md5_hex(float(sample_clause.fraction))
+                )
+                f_where = s_key < fraction_md5_hex
+                where = where & f_where if where is not None else f_where
+            order_by: list[tuple[exprs.Expr, bool]] = [(s_key, True)]
+            limit = exprs.Literal(sample_clause.n)
+            # Note that group_by is not possible here
+            return SamplingClauses(where, None, order_by, limit, None)
     @classmethod
     def create_query_plan(
         cls,
@@ -701,6 +883,7 @@ class Planner:
         group_by_clause: Optional[list[exprs.Expr]] = None,
         order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
         limit: Optional[exprs.Expr] = None,
+        sample_clause: Optional[SampleClause] = None,
         ignore_errors: bool = False,
         exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
     ) -> exec.ExecNode:
@@ -714,14 +897,22 @@ class Planner:
             order_by_clause = []
         if exact_version_only is None:
             exact_version_only = []
+        # Modify clauses to include sample clause
+        where, group_by_clause, order_by_clause, limit, sample = cls.create_sample_clauses(
+            from_clause, sample_clause, where_clause, group_by_clause, order_by_clause, limit
+        )
         analyzer = Analyzer(
             from_clause,
             select_list,
-            where_clause=where_clause,
+            where_clause=where,
             group_by_clause=group_by_clause,
             order_by_clause=order_by_clause,
         )
         row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [])
+        if sample_clause is not None and analyzer.filter is not None:
+            raise excs.Error(f'Filter {analyzer.filter} not expressible in SQL')
         analyzer.finalize(row_builder)
         # select_list: we need to materialize everything that's been collected
@@ -732,6 +923,7 @@ class Planner:
             analyzer=analyzer,
             eval_ctx=eval_ctx,
             limit=limit,
+            sample_clause=sample,
             with_pk=True,
             exact_version_only=exact_version_only,
         )
@@ -747,6 +939,7 @@ class Planner:
         analyzer: Analyzer,
         eval_ctx: exprs.RowBuilder.EvalCtx,
         limit: Optional[exprs.Expr] = None,
+        sample_clause: Optional[SampleClause] = None,
         with_pk: bool = False,
         exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
     ) -> exec.ExecNode:
@@ -857,12 +1050,26 @@ class Planner:
                 sql_elements.contains_all(analyzer.select_list)
                 and sql_elements.contains_all(analyzer.grouping_exprs)
                 and isinstance(plan, exec.SqlNode)
-                and plan.to_cte() is not None
+                and plan.to_cte(keep_pk=(sample_clause is not None)) is not None
             ):
-                plan = exec.SqlAggregationNode(
-                    row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause
-                )
+                if sample_clause is not None:
+                    plan = exec.SqlSampleNode(
+                        row_builder,
+                        input=plan,
+                        select_list=analyzer.select_list,
+                        stratify_exprs=analyzer.group_by_clause,
+                        sample_clause=sample_clause,
+                    )
+                else:
+                    plan = exec.SqlAggregationNode(
+                        row_builder,
+                        input=plan,
+                        select_list=analyzer.select_list,
+                        group_by_items=analyzer.group_by_clause,
+                    )
             else:
+                if sample_clause is not None:
+                    raise excs.Error('Sample clause not supported with Python aggregation')
                 input_sql_node = plan.get_node(exec.SqlNode)
                 assert combined_ordering is not None
                 input_sql_node.set_order_by(combined_ordering)

pixeltable 0.4.0rc1__py3-none-any.whl → 0.4.0rc3__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.0rc1py3-none-any.whl → 0.4.0rc3py3-none-any.whl