PyPI - pixeltable - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

pixeltable 0.3.0py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (47) hide show

pixeltable/__version__.py +2 -2
pixeltable/catalog/table_version.py +2 -1
pixeltable/dataframe.py +52 -27
pixeltable/env.py +92 -4
pixeltable/exec/__init__.py +1 -1
pixeltable/exec/aggregation_node.py +3 -3
pixeltable/exec/cache_prefetch_node.py +13 -7
pixeltable/exec/component_iteration_node.py +3 -9
pixeltable/exec/data_row_batch.py +17 -5
pixeltable/exec/exec_node.py +32 -12
pixeltable/exec/expr_eval/__init__.py +1 -0
pixeltable/exec/expr_eval/evaluators.py +245 -0
pixeltable/exec/expr_eval/expr_eval_node.py +404 -0
pixeltable/exec/expr_eval/globals.py +114 -0
pixeltable/exec/expr_eval/row_buffer.py +76 -0
pixeltable/exec/expr_eval/schedulers.py +232 -0
pixeltable/exec/in_memory_data_node.py +2 -2
pixeltable/exec/row_update_node.py +14 -14
pixeltable/exec/sql_node.py +2 -2
pixeltable/exprs/column_ref.py +5 -1
pixeltable/exprs/data_row.py +50 -40
pixeltable/exprs/expr.py +57 -12
pixeltable/exprs/function_call.py +54 -19
pixeltable/exprs/inline_expr.py +12 -21
pixeltable/exprs/literal.py +25 -8
pixeltable/exprs/row_builder.py +23 -0
pixeltable/func/aggregate_function.py +4 -0
pixeltable/func/callable_function.py +54 -4
pixeltable/func/expr_template_function.py +5 -1
pixeltable/func/function.py +48 -7
pixeltable/func/query_template_function.py +16 -7
pixeltable/func/udf.py +7 -1
pixeltable/functions/__init__.py +1 -1
pixeltable/functions/anthropic.py +95 -21
pixeltable/functions/gemini.py +2 -6
pixeltable/functions/openai.py +207 -28
pixeltable/globals.py +1 -1
pixeltable/plan.py +24 -9
pixeltable/store.py +6 -0
pixeltable/type_system.py +3 -3
pixeltable/utils/arrow.py +3 -3
{pixeltable-0.3.0.dist-info → pixeltable-0.3.1.dist-info}/METADATA +3 -1
{pixeltable-0.3.0.dist-info → pixeltable-0.3.1.dist-info}/RECORD +46 -41
pixeltable/exec/expr_eval_node.py +0 -232
{pixeltable-0.3.0.dist-info → pixeltable-0.3.1.dist-info}/LICENSE +0 -0
{pixeltable-0.3.0.dist-info → pixeltable-0.3.1.dist-info}/WHEEL +0 -0
{pixeltable-0.3.0.dist-info → pixeltable-0.3.1.dist-info}/entry_points.txt +0 -0

pixeltable/functions/openai.py CHANGED Viewed

@@ -6,14 +6,18 @@ the [Working with OpenAI](https://pixeltable.readme.io/docs/working-with-openai)
 """
 import base64
+import datetime
 import io
 import json
+import logging
 import pathlib
+import re
 import uuid
-from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
+from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union, cast, Any, Type
-import numpy as np
 import PIL.Image
+import httpx
+import numpy as np
 import tenacity
 import pixeltable as pxt
@@ -24,15 +28,28 @@ from pixeltable.utils.code import local_public_names
 if TYPE_CHECKING:
     import openai
+_logger = logging.getLogger('pixeltable')
 @env.register_client('openai')
-def _(api_key: str) -> 'openai.OpenAI':
+def _(api_key: str) -> tuple['openai.OpenAI', 'openai.AsyncOpenAI']:
     import openai
-    return openai.OpenAI(api_key=api_key)
+    return (
+        openai.OpenAI(api_key=api_key),
+        openai.AsyncOpenAI(
+            api_key=api_key,
+            # recommended to increase limits for async client to avoid connection errors
+            http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
+        )
+    )
 def _openai_client() -> 'openai.OpenAI':
-    return env.Env.get().get_client('openai')
+    return env.Env.get().get_client('openai')[0]
+def _async_openai_client() -> 'openai.AsyncOpenAI':
+    return env.Env.get().get_client('openai')[1]
 # Exponential backoff decorator using tenacity.
@@ -47,13 +64,128 @@ def _retry(fn: Callable) -> Callable:
     )(fn)
+# models that share rate limits; see https://platform.openai.com/settings/organization/limits for details
+_shared_rate_limits = {
+    'gpt-4-turbo': [
+        'gpt-4-turbo',
+        'gpt-4-turbo-latest',
+        'gpt-4-turbo-2024-04-09',
+        'gpt-4-turbo-preview',
+        'gpt-4-0125-preview',
+        'gpt-4-1106-preview'
+    ],
+    'gpt-4o': [
+        'gpt-4o',
+        'gpt-4o-latest',
+        'gpt-4o-2024-05-13',
+        'gpt-4o-2024-08-06',
+        'gpt-4o-2024-11-20',
+        'gpt-4o-audio-preview',
+        'gpt-4o-audio-preview-2024-10-01',
+        'gpt-4o-audio-preview-2024-12-17'
+    ],
+    'gpt-4o-mini': [
+        'gpt-4o-mini',
+        'gpt-4o-mini-latest',
+        'gpt-4o-mini-2024-07-18',
+        'gpt-4o-mini-audio-preview',
+        'gpt-4o-mini-audio-preview-2024-12-17'
+    ],
+    'gpt-4o-mini-realtime-preview': [
+        'gpt-4o-mini-realtime-preview',
+        'gpt-4o-mini-realtime-preview-latest',
+        'gpt-4o-mini-realtime-preview-2024-12-17'
+    ]
+}
+def _resource_pool(model: str) -> str:
+    for model_family, models in _shared_rate_limits.items():
+        if model in models:
+            return f'rate-limits:openai:{model_family}'
+    return f'rate-limits:openai:{model}'
+class OpenAIRateLimitsInfo(env.RateLimitsInfo):
+    retryable_errors: tuple[Type[Exception], ...]
+    def __init__(self, get_request_resources: Callable[..., dict[str, int]]):
+        super().__init__(get_request_resources)
+        import openai
+        self.retryable_errors = (
+            openai.RateLimitError, openai.APITimeoutError, openai.UnprocessableEntityError, openai.InternalServerError
+        )
+    def get_retry_delay(self, exc: Exception) -> Optional[float]:
+        import openai
+        if not isinstance(exc, self.retryable_errors):
+            return None
+        assert isinstance(exc, openai.APIError)
+        return 1.0
+# RE pattern for duration in '*-reset' headers;
+# examples: 1d2h3ms, 4m5.6s; # fractional seconds can be reported as 0.5s or 500ms
+_header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d+)m)?(?:([\d.]+)s)?')
+def _parse_header_duration(duration_str):
+    match = _header_duration_pattern.match(duration_str)
+    if not match:
+        raise ValueError("Invalid duration format")
+    days = int(match.group(1) or 0)
+    hours = int(match.group(2) or 0)
+    milliseconds = int(match.group(3) or 0)
+    minutes = int(match.group(4) or 0)
+    seconds = float(match.group(5) or 0)
+    return datetime.timedelta(
+        days=days,
+        hours=hours,
+        minutes=minutes,
+        seconds=seconds,
+        milliseconds=milliseconds
+    )
+def _get_header_info(
+        headers: httpx.Headers, *, requests: bool = True, tokens: bool = True
+) -> tuple[Optional[tuple[int, int, datetime.datetime]], Optional[tuple[int, int, datetime.datetime]]]:
+    assert requests or tokens
+    now = datetime.datetime.now(tz=datetime.timezone.utc)
+    requests_info: Optional[tuple[int, int, datetime.datetime]] = None
+    if requests:
+        requests_limit_str = headers.get('x-ratelimit-limit-requests')
+        requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
+        requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
+        requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
+        requests_reset_str = headers.get('x-ratelimit-reset-requests')
+        requests_reset_ts = now + _parse_header_duration(requests_reset_str)
+        requests_info = (requests_limit, requests_remaining, requests_reset_ts)
+    tokens_info: Optional[tuple[int, int, datetime.datetime]] = None
+    if tokens:
+        tokens_limit_str = headers.get('x-ratelimit-limit-tokens')
+        tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
+        tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
+        tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
+        tokens_reset_str = headers.get('x-ratelimit-reset-tokens')
+        tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
+        tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
+    return requests_info, tokens_info
 #####################################
 # Audio Endpoints
 @pxt.udf
 def speech(
-    input: str, *, model: str, voice: str, response_format: Optional[str] = None, speed: Optional[float] = None
+        input: str, *, model: str, voice: str, response_format: Optional[str] = None, speed: Optional[float] = None
 ) -> pxt.Audio:
     """
     Generates audio from the input text.
@@ -176,8 +308,24 @@ def translations(
 # Chat Endpoints
+def _chat_completions_get_request_resources(
+        messages: list, max_tokens: Optional[int], n: Optional[int]
+) -> dict[str, int]:
+    completion_tokens = n * max_tokens
+    num_tokens = 0.0
+    for message in messages:
+        num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
+        for key, value in message.items():
+            num_tokens += len(value) / 4
+            if key == "name":  # if there's a name, the role is omitted
+                num_tokens -= 1  # role is always required and always 1 token
+    num_tokens += 2  # every reply is primed with <im_start>assistant
+    return {'requests': 1, 'tokens': int(num_tokens) + completion_tokens}
 @pxt.udf
-def chat_completions(
+async def chat_completions(
     messages: list,
     *,
     model: str,
@@ -185,8 +333,8 @@ def chat_completions(
     logit_bias: Optional[dict[str, int]] = None,
     logprobs: Optional[bool] = None,
     top_logprobs: Optional[int] = None,
-    max_tokens: Optional[int] = None,
-    n: Optional[int] = None,
+    max_tokens: Optional[int] = 1024,
+    n: Optional[int] = 1,
     presence_penalty: Optional[float] = None,
     response_format: Optional[dict] = None,
     seed: Optional[int] = None,
@@ -226,7 +374,6 @@ def chat_completions(
             ]
             tbl['response'] = chat_completions(messages, model='gpt-4o-mini')
     """
     if tools is not None:
         tools = [
             {
@@ -253,7 +400,8 @@ def chat_completions(
     if tool_choice is not None and not tool_choice['parallel_tool_calls']:
         extra_body = {'parallel_tool_calls': False}
-    result = _retry(_openai_client().chat.completions.create)(
+    # cast(Any, ...): avoid mypy errors
+    result = await _async_openai_client().chat.completions.with_raw_response.create(
         messages=messages,
         model=model,
         frequency_penalty=_opt(frequency_penalty),
@@ -263,17 +411,25 @@ def chat_completions(
         max_tokens=_opt(max_tokens),
         n=_opt(n),
         presence_penalty=_opt(presence_penalty),
-        response_format=_opt(response_format),
+        response_format=_opt(cast(Any, response_format)),
         seed=_opt(seed),
         stop=_opt(stop),
         temperature=_opt(temperature),
         top_p=_opt(top_p),
-        tools=_opt(tools),
-        tool_choice=_opt(tool_choice_),
+        tools=_opt(cast(Any, tools)),
+        tool_choice=_opt(cast(Any, tool_choice_)),
         user=_opt(user),
+        timeout=10,
         extra_body=extra_body,
     )
-    return result.dict()
+    resource_pool = _resource_pool(model)
+    requests_info, tokens_info = _get_header_info(result.headers)
+    rate_limits_info = env.Env.get().get_resource_pool_info(resource_pool, lambda: OpenAIRateLimitsInfo(
+        _chat_completions_get_request_resources))
+    rate_limits_info.record(requests=requests_info, tokens=tokens_info)
+    return json.loads(result.text)
 @pxt.udf
@@ -330,8 +486,13 @@ _embedding_dimensions_cache: dict[str, int] = {
 }
+def _embeddings_get_request_resources(input: list[str]) -> dict[str, int]:
+    input_len = sum(len(s) for s in input)
+    return {'requests': 1, 'tokens': int(input_len / 4)}
 @pxt.udf(batch_size=32)
-def embeddings(
+async def embeddings(
     input: Batch[str], *, model: str, dimensions: Optional[int] = None, user: Optional[str] = None
 ) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
@@ -361,10 +522,16 @@ def embeddings(
         >>> tbl['embed'] = embeddings(tbl.text, model='text-embedding-3-small')
     """
-    result = _retry(_openai_client().embeddings.create)(
+    _logger.debug(f'embeddings: batch_size={len(input)}')
+    result = await _async_openai_client().embeddings.with_raw_response.create(
         input=input, model=model, dimensions=_opt(dimensions), user=_opt(user), encoding_format='float'
     )
-    return [np.array(data.embedding, dtype=np.float64) for data in result.data]
+    resource_pool = _resource_pool(model)
+    requests_info, tokens_info = _get_header_info(result.headers)
+    rate_limits_info = env.Env.get().get_resource_pool_info(
+        resource_pool, lambda: OpenAIRateLimitsInfo(_embeddings_get_request_resources))
+    rate_limits_info.record(requests=requests_info, tokens=tokens_info)
+    return [np.array(data['embedding'], dtype=np.float64) for data in json.loads(result.content)['data']]
 @embeddings.conditional_return_type
@@ -385,7 +552,7 @@ def _(model: str, dimensions: Optional[int] = None) -> pxt.ArrayType:
 def image_generations(
     prompt: str,
     *,
-    model: Optional[str] = None,
+    model: str = 'dall-e-2',
     quality: Optional[str] = None,
     size: Optional[str] = None,
     style: Optional[str] = None,
@@ -441,7 +608,7 @@ def _(size: Optional[str] = None) -> pxt.ImageType:
     if x_pos == -1:
         return pxt.ImageType()
     try:
-        width, height = int(size[:x_pos]), int(size[x_pos + 1 :])
+        width, height = int(size[:x_pos]), int(size[x_pos + 1:])
     except ValueError:
         return pxt.ImageType()
     return pxt.ImageType(size=(width, height))
@@ -452,7 +619,7 @@ def _(size: Optional[str] = None) -> pxt.ImageType:
 @pxt.udf
-def moderations(input: str, *, model: Optional[str] = None) -> dict:
+def moderations(input: str, *, model: str = 'omni-moderation-latest') -> dict:
     """
     Classifies if text is potentially harmful.
@@ -482,6 +649,18 @@ def moderations(input: str, *, model: Optional[str] = None) -> dict:
     return result.dict()
+# @speech.resource_pool
+# @transcriptions.resource_pool
+# @translations.resource_pool
+@chat_completions.resource_pool
+# @vision.resource_pool
+@embeddings.resource_pool
+# @image_generations.resource_pool
+# @moderations.resource_pool
+def _(model: str) -> str:
+    return _resource_pool(model)
 def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
     """Converts an OpenAI response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
     return tools._invoke(_openai_response_to_pxt_tool_calls(response))
@@ -489,15 +668,15 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
 @pxt.udf
 def _openai_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
+    if 'tool_calls' not in response['choices'][0]['message'] or response['choices'][0]['message']['tool_calls'] is None:
+        return None
     openai_tool_calls = response['choices'][0]['message']['tool_calls']
-    if openai_tool_calls is not None:
-        return {
-            tool_call['function']['name']: {
-                'args': json.loads(tool_call['function']['arguments'])
-            }
-            for tool_call in openai_tool_calls
+    return {
+        tool_call['function']['name']: {
+            'args': json.loads(tool_call['function']['arguments'])
         }
-    return None
+        for tool_call in openai_tool_calls
+    }
 _T = TypeVar('_T')

pixeltable/globals.py CHANGED Viewed

@@ -817,4 +817,4 @@ def configure_logging(
 def array(elements: Iterable) -> exprs.Expr:
-    return exprs.InlineArray(elements)
+    return exprs.Expr.from_array(elements)

pixeltable/plan.py CHANGED Viewed

@@ -5,6 +5,7 @@ import enum
 from typing import Any, Iterable, Optional, Sequence, Literal
 from uuid import UUID
 import sqlalchemy as sql
 import pixeltable as pxt
@@ -166,10 +167,13 @@ class Analyzer:
             raise excs.Error(
                 f'Invalid non-aggregate expression in aggregate query: {self.select_list[is_agg_output.index(False)]}')
-        # check that filter doesn't contain aggregates
+        # check that Where clause and filter doesn't contain aggregates
+        if self.sql_where_clause is not None:
+            if any(_is_agg_fn_call(e) for e in self.sql_where_clause.subexprs(expr_class=exprs.FunctionCall)):
+                raise excs.Error(f'where() cannot contain aggregate functions: {self.sql_where_clause}')
         if self.filter is not None:
             if any(_is_agg_fn_call(e) for e in self.filter.subexprs(expr_class=exprs.FunctionCall)):
-                raise excs.Error(f'Filter cannot contain aggregate functions: {self.filter}')
+                raise excs.Error(f'where() cannot contain aggregate functions: {self.filter}')
         # check that grouping exprs don't contain aggregates and can be expressed as SQL (we perform sort-based
         # aggregation and rely on the SqlScanNode returning data in the correct order)
@@ -283,7 +287,8 @@ class Planner:
         computed_exprs = row_builder.output_exprs - row_builder.input_exprs
         if len(computed_exprs) > 0:
             # add an ExprEvalNode when there are exprs to compute
-            plan = exec.ExprEvalNode(row_builder, computed_exprs, plan.output_exprs, input=plan)
+            plan = exec.ExprEvalNode(
+                row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False)
         stored_col_info = row_builder.output_slot_idxs()
         stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
@@ -548,7 +553,7 @@ class Planner:
             plan = exec.ComponentIterationNode(target, plan)
         if len(view_output_exprs) > 0:
             plan = exec.ExprEvalNode(
-                row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs,input=plan)
+                row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan)
         stored_img_col_info = [info for info in row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
         plan.set_stored_img_cols(stored_img_col_info)
@@ -750,10 +755,12 @@ class Planner:
             ctx.batch_size = 16
             # do aggregation in SQL if all agg exprs can be translated
-            if (sql_elements.contains_all(analyzer.select_list)
-                    and sql_elements.contains_all(analyzer.grouping_exprs)
-                    and isinstance(plan, exec.SqlNode)
-                    and plan.to_cte() is not None):
+            if (
+                sql_elements.contains_all(analyzer.select_list)
+                and sql_elements.contains_all(analyzer.grouping_exprs)
+                and isinstance(plan, exec.SqlNode)
+                and plan.to_cte() is not None
+            ):
                 plan = exec.SqlAggregationNode(
                     row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause)
             else:
@@ -770,14 +777,22 @@ class Planner:
                 # we need an ExprEvalNode to evaluate the remaining output exprs
                 plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, sql_exprs, input=plan)
             # we're returning everything to the user, so we might as well do it in a single batch
+            # TODO: return smaller batches in order to increase inter-ExecNode parallelism
             ctx.batch_size = 0
+        sql_node = plan.get_node(exec.SqlNode)
         if len(analyzer.order_by_clause) > 0:
             # we have the last SqlNode we created produce the ordering
-            sql_node = plan.get_node(exec.SqlNode)
             assert sql_node is not None
             sql_node.set_order_by(analyzer.order_by_clause)
+        # if we don't need an ordered result, tell the ExprEvalNode not to maintain input order (which allows us to
+        # return batches earlier)
+        if sql_node is not None and len(sql_node.order_by_clause) == 0:
+            expr_eval_node = plan.get_node(exec.ExprEvalNode)
+            if expr_eval_node is not None:
+                expr_eval_node.set_input_order(False)
         if limit is not None:
             plan.set_limit(limit)

pixeltable/store.py CHANGED Viewed

@@ -229,6 +229,7 @@ class StoreBase:
             sql.exc.DBAPIError if there was a SQL error during execution
             excs.Error if on_error='abort' and there was an exception during row evaluation
         """
+        assert col.tbl.id == self.tbl_version.id
         num_excs = 0
         num_rows = 0
@@ -249,6 +250,7 @@ class StoreBase:
         try:
             # insert rows from exec_plan into temp table
+            # TODO: unify the table row construction logic with RowBuilder.create_table_row()
             for row_batch in exec_plan:
                 num_rows += len(row_batch)
                 tbl_rows: list[dict[str, Any]] = []
@@ -272,6 +274,10 @@ class StoreBase:
                             tbl_row[col.sa_errortype_col.name] = error_type
                             tbl_row[col.sa_errormsg_col.name] = error_msg
                         else:
+                            if col.col_type.is_image_type() and result_row.file_urls[value_expr_slot_idx] is None:
+                                # we have yet to store this image
+                                filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
+                                result_row.flush_img(value_expr_slot_idx, filepath)
                             val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
                             if col.col_type.is_media_type():
                                 val = self._move_tmp_media_file(val, col, result_row.pk[-1])

pixeltable/type_system.py CHANGED Viewed

@@ -246,8 +246,8 @@ class ColumnType:
             col_type = ArrayType.from_literal(val, nullable=nullable)
             if col_type is not None:
                 return col_type
-            # this could still be json-serializable
-        if isinstance(val, dict) or isinstance(val, list) or isinstance(val, np.ndarray) or isinstance(val, pydantic.BaseModel):
+        # this could still be json-serializable
+        if isinstance(val, (list, tuple, dict, np.ndarray, pydantic.BaseModel)):
             try:
                 JsonType().validate_literal(val)
                 return JsonType(nullable=nullable)
@@ -866,7 +866,7 @@ class ArrayType(ColumnType):
                 continue
             if n1 != n2:
                 return False
-        return val.dtype == self.numpy_dtype()
+        return np.issubdtype(val.dtype, self.numpy_dtype())
     def _to_json_schema(self) -> dict[str, Any]:
         return {

pixeltable/utils/arrow.py CHANGED Viewed

@@ -75,7 +75,7 @@ def to_arrow_schema(pixeltable_schema: dict[str, Any]) -> pa.Schema:
     return pa.schema((name, to_arrow_type(typ)) for name, typ in pixeltable_schema.items())  # type: ignore[misc]
-def to_pydict(batch: pa.RecordBatch) -> dict[str, Union[list, np.ndarray]]:
+def to_pydict(batch: Union[pa.Table, pa.RecordBatch]) -> dict[str, Union[list, np.ndarray]]:
     """Convert a RecordBatch to a dictionary of lists, unlike pa.lib.RecordBatch.to_pydict,
     this function will not convert numpy arrays to lists, and will preserve the original numpy dtype.
     """
@@ -84,7 +84,7 @@ def to_pydict(batch: pa.RecordBatch) -> dict[str, Union[list, np.ndarray]]:
         col = batch.column(k)
         if isinstance(col.type, pa.FixedShapeTensorType):
             # treat array columns as numpy arrays to easily preserve numpy type
-            out[name] = col.to_numpy(zero_copy_only=False)
+            out[name] = col.to_numpy(zero_copy_only=False)  # type: ignore[call-arg]
         else:
             # for the rest, use pydict to preserve python types
             out[name] = col.to_pylist()
@@ -92,7 +92,7 @@ def to_pydict(batch: pa.RecordBatch) -> dict[str, Union[list, np.ndarray]]:
     return out
-def iter_tuples(batch: pa.RecordBatch) -> Iterator[dict[str, Any]]:
+def iter_tuples(batch: Union[pa.Table, pa.RecordBatch]) -> Iterator[dict[str, Any]]:
     """Convert a RecordBatch to an iterator of dictionaries. also works with pa.Table and pa.RowGroup"""
     pydict = to_pydict(batch)
     assert len(pydict) > 0, 'empty record batch'

{pixeltable-0.3.0.dist-info → pixeltable-0.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pixeltable
-Version: 0.3.0
+Version: 0.3.1
 Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
 Home-page: https://pixeltable.com/
 License: Apache-2.0
@@ -27,11 +27,13 @@ Requires-Dist: av (>=10.0.0)
 Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
 Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
 Requires-Dist: ftfy (>=6.2.0,<7.0.0)
+Requires-Dist: httpx (>=0.27)
 Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
 Requires-Dist: jmespath (>=1.0.1,<2.0.0)
 Requires-Dist: jsonschema (>=4.1.0)
 Requires-Dist: lxml (>=5.0)
 Requires-Dist: more-itertools (>=10.2,<11.0)
+Requires-Dist: nest_asyncio (>=1.5)
 Requires-Dist: numpy (>=1.25,<2.0)
 Requires-Dist: pandas (>=2.0,<3.0)
 Requires-Dist: pgvector (>=0.2.1,<0.3.0)

pixeltable 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

pixeltable 0.3.0py3-none-any.whl → 0.3.1py3-none-any.whl