PyPI - pixeltable - Versions diffs - 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl - Mend

pixeltable 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (69) hide show

pixeltable/__init__.py +4 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/catalog.py +7 -9
pixeltable/catalog/column.py +49 -0
pixeltable/catalog/insertable_table.py +0 -7
pixeltable/catalog/schema_object.py +1 -14
pixeltable/catalog/table.py +180 -67
pixeltable/catalog/table_version.py +42 -146
pixeltable/catalog/table_version_path.py +6 -5
pixeltable/catalog/view.py +2 -1
pixeltable/config.py +24 -9
pixeltable/dataframe.py +5 -6
pixeltable/env.py +113 -21
pixeltable/exec/aggregation_node.py +1 -1
pixeltable/exec/cache_prefetch_node.py +4 -3
pixeltable/exec/exec_node.py +0 -8
pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
pixeltable/exec/expr_eval/globals.py +1 -0
pixeltable/exec/expr_eval/schedulers.py +52 -19
pixeltable/exec/in_memory_data_node.py +2 -3
pixeltable/exprs/array_slice.py +2 -2
pixeltable/exprs/data_row.py +15 -2
pixeltable/exprs/expr.py +9 -9
pixeltable/exprs/function_call.py +61 -23
pixeltable/exprs/globals.py +1 -2
pixeltable/exprs/json_path.py +3 -3
pixeltable/exprs/row_builder.py +25 -21
pixeltable/exprs/string_op.py +3 -3
pixeltable/func/expr_template_function.py +6 -3
pixeltable/func/query_template_function.py +2 -2
pixeltable/func/signature.py +30 -3
pixeltable/func/tools.py +2 -2
pixeltable/functions/anthropic.py +76 -27
pixeltable/functions/deepseek.py +5 -1
pixeltable/functions/gemini.py +11 -2
pixeltable/functions/globals.py +2 -2
pixeltable/functions/huggingface.py +6 -12
pixeltable/functions/llama_cpp.py +9 -1
pixeltable/functions/openai.py +76 -55
pixeltable/functions/video.py +59 -6
pixeltable/functions/vision.py +2 -2
pixeltable/globals.py +86 -13
pixeltable/io/datarows.py +3 -3
pixeltable/io/fiftyone.py +7 -7
pixeltable/io/globals.py +3 -3
pixeltable/io/hf_datasets.py +4 -4
pixeltable/io/label_studio.py +2 -1
pixeltable/io/pandas.py +6 -6
pixeltable/io/parquet.py +3 -3
pixeltable/io/table_data_conduit.py +2 -2
pixeltable/io/utils.py +2 -2
pixeltable/iterators/audio.py +3 -2
pixeltable/iterators/document.py +2 -8
pixeltable/iterators/video.py +49 -9
pixeltable/plan.py +0 -16
pixeltable/share/packager.py +51 -42
pixeltable/share/publish.py +134 -7
pixeltable/store.py +5 -25
pixeltable/type_system.py +5 -8
pixeltable/utils/__init__.py +2 -2
pixeltable/utils/arrow.py +5 -5
pixeltable/utils/description_helper.py +3 -3
pixeltable/utils/iceberg.py +1 -2
pixeltable/utils/media_store.py +131 -66
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0

pixeltable/functions/huggingface.py CHANGED Viewed

@@ -63,13 +63,10 @@ def sentence_transformer(
 @sentence_transformer.conditional_return_type
 def _(model_id: str) -> ts.ArrayType:
-    try:
-        from sentence_transformers import SentenceTransformer
+    from sentence_transformers import SentenceTransformer
-        model = _lookup_model(model_id, SentenceTransformer)
-        return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
-    except ImportError:
-        return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
+    model = _lookup_model(model_id, SentenceTransformer)
+    return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
 @pxt.udf
@@ -201,13 +198,10 @@ def _(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,
 @clip.conditional_return_type
 def _(model_id: str) -> ts.ArrayType:
-    try:
-        from transformers import CLIPModel
+    from transformers import CLIPModel
-        model = _lookup_model(model_id, CLIPModel.from_pretrained)
-        return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
-    except ImportError:
-        return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
+    model = _lookup_model(model_id, CLIPModel.from_pretrained)
+    return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
 @pxt.udf(batch_size=4)

pixeltable/functions/llama_cpp.py CHANGED Viewed

@@ -93,10 +93,18 @@ def _lookup_pretrained_model(repo_id: str, filename: Optional[str], n_gpu_layers
     return _model_cache[key]
-_model_cache: dict[tuple[str, str, int], Any] = {}
+_model_cache: dict[tuple[str, str, int], 'llama_cpp.Llama'] = {}
 _IS_GPU_AVAILABLE: Optional[bool] = None
+def cleanup() -> None:
+    for model in _model_cache.values():
+        if model._sampler is not None:
+            model._sampler.close()
+        model.close()
+    _model_cache.clear()
 __all__ = local_public_names(__name__)

pixeltable/functions/openai.py CHANGED Viewed

@@ -23,6 +23,7 @@ import pixeltable as pxt
 from pixeltable import env, exprs, type_system as ts
 from pixeltable.func import Batch, Tools
 from pixeltable.utils.code import local_public_names
+from pixeltable.utils.media_store import TempStore
 if TYPE_CHECKING:
     import openai
@@ -91,6 +92,49 @@ def _rate_limits_pool(model: str) -> str:
     return f'rate-limits:openai:{model}'
+# RE pattern for duration in '*-reset' headers;
+# examples: 1d2h3ms, 4m5.6s; # fractional seconds can be reported as 0.5s or 500ms
+_header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d+)m)?(?:([\d.]+)s)?')
+def _parse_header_duration(duration_str: str) -> datetime.timedelta:
+    match = _header_duration_pattern.match(duration_str)
+    if not match:
+        raise ValueError(f'Invalid duration format: {duration_str}')
+    days = int(match.group(1) or 0)
+    hours = int(match.group(2) or 0)
+    milliseconds = int(match.group(3) or 0)
+    minutes = int(match.group(4) or 0)
+    seconds = float(match.group(5) or 0)
+    return datetime.timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds, milliseconds=milliseconds)
+def _get_header_info(
+    headers: httpx.Headers,
+) -> tuple[Optional[tuple[int, int, datetime.datetime]], Optional[tuple[int, int, datetime.datetime]]]:
+    now = datetime.datetime.now(tz=datetime.timezone.utc)
+    requests_limit_str = headers.get('x-ratelimit-limit-requests')
+    requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
+    requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
+    requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
+    requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s')  # Default to 5 seconds
+    requests_reset_ts = now + _parse_header_duration(requests_reset_str)
+    requests_info = (requests_limit, requests_remaining, requests_reset_ts)
+    tokens_limit_str = headers.get('x-ratelimit-limit-tokens')
+    tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
+    tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
+    tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
+    tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s')  # Default to 5 seconds
+    tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
+    tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
+    return requests_info, tokens_info
 class OpenAIRateLimitsInfo(env.RateLimitsInfo):
     retryable_errors: tuple[Type[Exception], ...]
@@ -111,61 +155,24 @@ class OpenAIRateLimitsInfo(env.RateLimitsInfo):
             openai.InternalServerError,
         )
+    def record_exc(self, exc: Exception) -> None:
+        import openai
+        _ = isinstance(exc, openai.APIError)
+        if not isinstance(exc, openai.APIError) or not hasattr(exc, 'response') or not hasattr(exc.response, 'headers'):
+            return
+        requests_info, tokens_info = _get_header_info(exc.response.headers)
+        _logger.debug(f'record_exc(): requests_info={requests_info} tokens_info={tokens_info}')
+        self.record(requests=requests_info, tokens=tokens_info)
+        self.has_exc = True
     def get_retry_delay(self, exc: Exception) -> Optional[float]:
         import openai
         if not isinstance(exc, self.retryable_errors):
             return None
         assert isinstance(exc, openai.APIError)
-        return 1.0
-# RE pattern for duration in '*-reset' headers;
-# examples: 1d2h3ms, 4m5.6s; # fractional seconds can be reported as 0.5s or 500ms
-_header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d+)m)?(?:([\d.]+)s)?')
-def _parse_header_duration(duration_str: str) -> datetime.timedelta:
-    match = _header_duration_pattern.match(duration_str)
-    if not match:
-        raise ValueError(f'Invalid duration format: {duration_str}')
-    days = int(match.group(1) or 0)
-    hours = int(match.group(2) or 0)
-    milliseconds = int(match.group(3) or 0)
-    minutes = int(match.group(4) or 0)
-    seconds = float(match.group(5) or 0)
-    return datetime.timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds, milliseconds=milliseconds)
-def _get_header_info(
-    headers: httpx.Headers, *, requests: bool = True, tokens: bool = True
-) -> tuple[Optional[tuple[int, int, datetime.datetime]], Optional[tuple[int, int, datetime.datetime]]]:
-    assert requests or tokens
-    now = datetime.datetime.now(tz=datetime.timezone.utc)
-    requests_info: Optional[tuple[int, int, datetime.datetime]] = None
-    if requests:
-        requests_limit_str = headers.get('x-ratelimit-limit-requests')
-        requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
-        requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
-        requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
-        requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s')  # Default to 5 seconds
-        requests_reset_ts = now + _parse_header_duration(requests_reset_str)
-        requests_info = (requests_limit, requests_remaining, requests_reset_ts)
-    tokens_info: Optional[tuple[int, int, datetime.datetime]] = None
-    if tokens:
-        tokens_limit_str = headers.get('x-ratelimit-limit-tokens')
-        tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
-        tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
-        tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
-        tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s')  # Default to 5 seconds
-        tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
-        tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
-    return requests_info, tokens_info
+        return super().get_retry_delay(exc)
 #####################################
@@ -210,7 +217,7 @@ async def speech(input: str, *, model: str, voice: str, model_kwargs: Optional[d
     content = await _openai_client().audio.speech.create(input=input, model=model, voice=voice, **model_kwargs)
     ext = model_kwargs.get('response_format', 'mp3')
-    output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
+    output_filename = str(TempStore.create_path(extension=f'.{ext}'))
     content.write_to_file(output_filename)
     return output_filename
@@ -355,6 +362,7 @@ async def chat_completions(
     model_kwargs: Optional[dict[str, Any]] = None,
     tools: Optional[list[dict[str, Any]]] = None,
     tool_choice: Optional[dict[str, Any]] = None,
+    _runtime_ctx: Optional[env.RuntimeCtx] = None,
 ) -> dict:
     """
     Creates a model response for the given chat conversation.
@@ -418,7 +426,8 @@ async def chat_completions(
     )
     requests_info, tokens_info = _get_header_info(result.headers)
-    rate_limits_info.record(requests=requests_info, tokens=tokens_info)
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
+    rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
     return json.loads(result.text)
@@ -461,7 +470,12 @@ def _vision_get_request_resources(
 @pxt.udf
 async def vision(
-    prompt: str, image: PIL.Image.Image, *, model: str, model_kwargs: Optional[dict[str, Any]] = None
+    prompt: str,
+    image: PIL.Image.Image,
+    *,
+    model: str,
+    model_kwargs: Optional[dict[str, Any]] = None,
+    _runtime_ctx: Optional[env.RuntimeCtx] = None,
 ) -> str:
     """
     Analyzes an image with the OpenAI vision capability. This is a convenience function that takes an image and
@@ -521,8 +535,10 @@ async def vision(
         **model_kwargs,
     )
+    # _logger.debug(f'vision(): headers={result.headers}')
     requests_info, tokens_info = _get_header_info(result.headers)
-    rate_limits_info.record(requests=requests_info, tokens=tokens_info)
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
+    rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
     result = json.loads(result.text)
     return result['choices'][0]['message']['content']
@@ -545,7 +561,11 @@ def _embeddings_get_request_resources(input: list[str]) -> dict[str, int]:
 @pxt.udf(batch_size=32)
 async def embeddings(
-    input: Batch[str], *, model: str, model_kwargs: Optional[dict[str, Any]] = None
+    input: Batch[str],
+    *,
+    model: str,
+    model_kwargs: Optional[dict[str, Any]] = None,
+    _runtime_ctx: Optional[env.RuntimeCtx] = None,
 ) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Creates an embedding vector representing the input text.
@@ -592,7 +612,8 @@ async def embeddings(
         input=input, model=model, encoding_format='float', **model_kwargs
     )
     requests_info, tokens_info = _get_header_info(result.headers)
-    rate_limits_info.record(requests=requests_info, tokens=tokens_info)
+    is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
+    rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
     return [np.array(data['embedding'], dtype=np.float64) for data in json.loads(result.content)['data']]

pixeltable/functions/video.py CHANGED Viewed

@@ -9,10 +9,10 @@ import numpy as np
 import PIL.Image
 import pixeltable as pxt
-from pixeltable import env
 from pixeltable.utils.code import local_public_names
+from pixeltable.utils.media_store import TempStore
-_format_defaults = {  # format -> (codec, ext)
+_format_defaults: dict[str, tuple[str, str]] = {  # format -> (codec, ext)
     'wav': ('pcm_s16le', 'wav'),
     'mp3': ('libmp3lame', 'mp3'),
     'flac': ('flac', 'flac'),
@@ -40,6 +40,59 @@ _format_defaults = {  # format -> (codec, ext)
 class make_video(pxt.Aggregator):
     """
     Aggregator that creates a video from a sequence of images.
+    Creates an H.264 encoded MP4 video from a sequence of PIL Image frames. This aggregator requires the input
+    frames to be ordered (typically by frame position) and is commonly used with `FrameIterator` views to
+    reconstruct videos from processed frames.
+    Args:
+        fps: Frames per second for the output video. Default is 25. This is set when the aggregator is created.
+    Returns:
+    - A `pxt.Video` containing the created video file path.
+    Examples:
+        Create a video from frames extracted using FrameIterator:
+        >>> import pixeltable as pxt
+        >>> from pixeltable.functions.video import make_video
+        >>> from pixeltable.iterators import FrameIterator
+        >>>
+        >>> # Create base table for videos
+        >>> videos_table = pxt.create_table('videos', {'video': pxt.Video})
+        >>>
+        >>> # Create view to extract frames
+        >>> frames_view = pxt.create_view(
+        ...     'video_frames',
+        ...     videos_table,
+        ...     iterator=FrameIterator.create(video=videos_table.video, fps=1)
+        ... )
+        >>>
+        >>> # Reconstruct video from frames
+        >>> frames_view.group_by(videos_table).select(
+        ...     make_video(frames_view.pos, frames_view.frame)
+        ... ).show()
+        Apply transformations to frames before creating a video:
+        >>> # Add computed column with transformed frames
+        >>> frames_view.add_computed_column(
+        ...     rotated_frame=frames_view.frame.rotate(30),
+        ...     stored=True
+        ... )
+        >>>
+        >>> # Create video from transformed frames
+        >>> frames_view.group_by(videos_table).select(
+        ...     make_video(frames_view.pos, frames_view.rotated_frame)
+        ... ).show()
+        Compare multiple processed versions side-by-side:
+        >>> frames_view.group_by(videos_table).select(
+        ...     make_video(frames_view.pos, frames_view.frame),
+        ...     make_video(frames_view.pos, frames_view.rotated_frame)
+        ... ).show()
     """
     container: Optional[av.container.OutputContainer]
@@ -56,7 +109,7 @@ class make_video(pxt.Aggregator):
         if frame is None:
             return
         if self.container is None:
-            self.out_file = env.Env.get().create_tmp_path('.mp4')
+            self.out_file = TempStore.create_path(extension='.mp4')
             self.container = av.open(str(self.out_file), mode='w')
             self.stream = self.container.add_stream('h264', rate=self.fps)
             self.stream.pix_fmt = 'yuv420p'
@@ -105,16 +158,16 @@ def extract_audio(
             return None
         audio_stream = container.streams.audio[stream_idx]
         # create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
-        output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
+        output_path = str(TempStore.create_path(extension=f'.{ext}'))
-        with av.open(output_filename, 'w', format=format) as output_container:
+        with av.open(output_path, 'w', format=format) as output_container:
             output_stream = output_container.add_stream(codec or default_codec)
             assert isinstance(output_stream, av.audio.stream.AudioStream)
             for packet in container.demux(audio_stream):
                 for frame in packet.decode():
                     output_container.mux(output_stream.encode(frame))  # type: ignore[arg-type]
-        return output_filename
+        return output_path
 @pxt.udf(is_method=True)

pixeltable/functions/vision.py CHANGED Viewed

@@ -14,7 +14,7 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
 import colorsys
 import hashlib
 from collections import defaultdict
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import numpy as np
 import PIL.Image
@@ -352,7 +352,7 @@ def draw_bounding_boxes(
     from PIL import ImageColor, ImageDraw, ImageFont
     # set default font if not provided
-    txt_font: Union[ImageFont.ImageFont, ImageFont.FreeTypeFont] = (
+    txt_font: ImageFont.ImageFont | ImageFont.FreeTypeFont = (
         ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size or 10)
     )

pixeltable/globals.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import logging
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
+from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, NamedTuple, Optional, Union
 import pandas as pd
 from pandas.io.formats.style import Styler
@@ -27,8 +27,8 @@ if TYPE_CHECKING:
         RowData,  # list of dictionaries
         DataFrame,  # Pixeltable DataFrame
         pd.DataFrame,  # pandas DataFrame
-        'datasets.Dataset',
-        'datasets.DatasetDict',  # Huggingface datasets
+        datasets.Dataset,
+        datasets.DatasetDict,  # Huggingface datasets
     ]
@@ -51,7 +51,7 @@ def create_table(
     source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
     schema_overrides: Optional[dict[str, Any]] = None,
     on_error: Literal['abort', 'ignore'] = 'abort',
-    primary_key: Optional[Union[str, list[str]]] = None,
+    primary_key: str | list[str] | None = None,
     num_retained_versions: int = 10,
     comment: str = '',
     media_validation: Literal['on_read', 'on_write'] = 'on_write',
@@ -197,7 +197,7 @@ def create_table(
 def create_view(
     path: str,
-    base: Union[catalog.Table, DataFrame],
+    base: catalog.Table | DataFrame,
     *,
     additional_columns: Optional[dict[str, Any]] = None,
     is_snapshot: bool = False,
@@ -317,7 +317,7 @@ def create_view(
 def create_snapshot(
     path_str: str,
-    base: Union[catalog.Table, DataFrame],
+    base: catalog.Table | DataFrame,
     *,
     additional_columns: Optional[dict[str, Any]] = None,
     iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
@@ -396,7 +396,12 @@ def create_snapshot(
     )
-def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optional[catalog.Table]:
+def create_replica(
+    destination: str,
+    source: str | catalog.Table,
+    bucket_name: str | None = None,
+    access: Literal['public', 'private'] = 'private',
+) -> Optional[catalog.Table]:
     """
     Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
     replica of a remote table. A given table can have at most one replica per Pixeltable instance.
@@ -405,6 +410,12 @@ def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optio
         destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
             a remote URI such as `'pxt://username/mydir.my_table'`.
         source: Path to the source table, or (if the source table is a local table) a handle to the source table.
+        bucket_name: The name of the pixeltable cloud-registered bucket to use to store replica's data.
+            If no `bucket_name` is provided, the default Pixeltable storage bucket will be used.
+        access: Access control for the replica.
+            - `'public'`: Anyone can access this replica.
+            - `'private'`: Only the owner can access.
     """
     remote_dest = destination.startswith('pxt://')
     remote_source = isinstance(source, str) and source.startswith('pxt://')
@@ -414,7 +425,7 @@ def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optio
     if remote_dest:
         if isinstance(source, str):
             source = get_table(source)
-        share.push_replica(destination, source)
+        share.push_replica(destination, source, bucket_name, access)
         return None
     else:
         assert isinstance(source, str)
@@ -484,7 +495,7 @@ def move(path: str, new_path: str) -> None:
 def drop_table(
-    table: Union[str, catalog.Table], force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
+    table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
 ) -> None:
     """Drop a table, view, or snapshot.
@@ -534,6 +545,57 @@ def drop_table(
     Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
+def get_dir_contents(dir_path: str = '', recursive: bool = True) -> 'DirContents':
+    """Get the contents of a Pixeltable directory.
+    Args:
+        dir_path: Path to the directory. Defaults to the root directory.
+        recursive: If `False`, returns only those tables and directories that are directly contained in specified
+            directory; if `True`, returns all tables and directories that are descendants of the specified directory,
+            recursively.
+    Returns:
+        A [`DirContents`][pixeltable.DirContents] object representing the contents of the specified directory.
+    Raises:
+        Error: If the path does not exist or does not designate a directory.
+    Examples:
+        Get contents of top-level directory:
+        >>> pxt.get_dir_contents()
+        Get contents of 'dir1':
+        >>> pxt.get_dir_contents('dir1')
+    """
+    path_obj = catalog.Path.parse(dir_path, allow_empty_path=True)
+    catalog_entries = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
+    dirs: list[str] = []
+    tables: list[str] = []
+    _assemble_dir_contents(dir_path, catalog_entries, dirs, tables)
+    dirs.sort()
+    tables.sort()
+    return DirContents(dirs, tables)
+def _assemble_dir_contents(
+    dir_path: str, catalog_entries: dict[str, Catalog.DirEntry], dirs: list[str], tables: list[str]
+) -> None:
+    for name, entry in catalog_entries.items():
+        if name.startswith('_'):
+            continue  # Skip system paths
+        path = f'{dir_path}.{name}' if len(dir_path) > 0 else name
+        if entry.dir is not None:
+            dirs.append(path)
+            if entry.dir_entries is not None:
+                _assemble_dir_contents(path, entry.dir_entries, dirs, tables)
+        else:
+            assert entry.table is not None
+            assert not entry.dir_entries
+            tables.append(path)
 def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
     """List the [`Table`][pixeltable.Table]s in a directory.
@@ -667,8 +729,8 @@ def ls(path: str = '') -> pd.DataFrame:
     This function returns a Pandas DataFrame representing a human-readable listing of the specified directory,
     including various attributes such as version and base table, as appropriate.
-    To get a programmatic list of tables and/or directories, use [list_tables()][pixeltable.list_tables] and/or
-    [list_dirs()][pixeltable.list_dirs] instead.
+    To get a programmatic list of the directory's contents, use [get_dir_contents()][pixeltable.get_dir_contents]
+    instead.
     """
     from pixeltable.catalog import retry_loop
     from pixeltable.metadata import schema
@@ -701,7 +763,7 @@ def ls(path: str = '') -> pd.DataFrame:
                     kind = 'view'
                 else:
                     kind = 'table'
-                version = '' if kind == 'snapshot' else md['version']
+                version = '' if kind == 'snapshot' else str(md['version'])
                 if md['is_replica']:
                     kind = f'{kind}-replica'
             rows.append([name, kind, version, base])
@@ -798,7 +860,7 @@ def list_functions() -> Styler:
     return pd_df.hide(axis='index')
-def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
+def tools(*args: func.Function | func.tools.Tool) -> func.tools.Tools:
     """
     Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
     LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
@@ -875,3 +937,14 @@ def configure_logging(
 def array(elements: Iterable) -> exprs.Expr:
     return exprs.Expr.from_array(elements)
+class DirContents(NamedTuple):
+    """
+    Represents the contents of a Pixeltable directory.
+    """
+    dirs: list[str]
+    """List of directory paths contained in this directory."""
+    tables: list[str]
+    """List of table paths contained in this directory."""

pixeltable/io/datarows.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import Any, Iterable, Optional, Union
+from typing import Any, Iterable, Optional
 import pixeltable as pxt
 import pixeltable.type_system as ts
@@ -61,7 +61,7 @@ def import_rows(
     rows: list[dict[str, Any]],
     *,
     schema_overrides: Optional[dict[str, Any]] = None,
-    primary_key: Optional[Union[str, list[str]]] = None,
+    primary_key: str | list[str] | None = None,
     num_retained_versions: int = 10,
     comment: str = '',
 ) -> pxt.Table:
@@ -105,7 +105,7 @@ def import_json(
     filepath_or_url: str,
     *,
     schema_overrides: Optional[dict[str, Any]] = None,
-    primary_key: Optional[Union[str, list[str]]] = None,
+    primary_key: str | list[str] | None = None,
     num_retained_versions: int = 10,
     comment: str = '',
     **kwargs: Any,

pixeltable/io/fiftyone.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from typing import Any, Iterator, Optional, Union
+from typing import Any, Iterator, Optional
 import fiftyone as fo  # type: ignore[import-untyped]
 import fiftyone.utils.data as foud  # type: ignore[import-untyped]
@@ -9,7 +9,7 @@ import puremagic
 import pixeltable as pxt
 import pixeltable.exceptions as excs
 from pixeltable import exprs
-from pixeltable.env import Env
+from pixeltable.utils.media_store import TempStore
 class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
@@ -28,11 +28,11 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
         tbl: pxt.Table,
         image: exprs.Expr,
         image_format: str,
-        classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
-        detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
+        classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
+        detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
         dataset_dir: Optional[os.PathLike] = None,
         shuffle: bool = False,
-        seed: Union[int, float, str, bytes, bytearray, None] = None,
+        seed: int | float | str | bytes | bytearray | None = None,
         max_samples: Optional[int] = None,
     ):
         super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
@@ -100,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
             assert isinstance(file, str)
         else:
             # Write the dynamically created image to a temp file
-            file = str(Env.get().create_tmp_path(f'.{self.__image_format}'))
+            file = TempStore.create_path(extension=f'.{self.__image_format}')
             img.save(file, format=self.__image_format)
         metadata = fo.ImageMetadata(
@@ -108,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
             mime_type=puremagic.from_file(file, mime=True),
             width=img.width,
             height=img.height,
-            filepath=file,
+            filepath=str(file),
             num_channels=len(img.getbands()),
         )

pixeltable/io/globals.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Any, Literal, Optional, Union
+from typing import TYPE_CHECKING, Any, Literal, Optional
 import pixeltable as pxt
 import pixeltable.exceptions as excs
@@ -143,8 +143,8 @@ def export_images_as_fo_dataset(
     tbl: pxt.Table,
     images: exprs.Expr,
     image_format: str = 'webp',
-    classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
-    detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
+    classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
+    detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
 ) -> 'fo.Dataset':
     """
     Export images from a Pixeltable table as a Voxel51 dataset. The data must consist of a single column

pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl