PyPI - pixeltable - Versions diffs - 0.4.13__py3-none-any.whl → 0.4.15__py3-none-any.whl - Mend

pixeltable 0.4.13py3-none-any.whl → 0.4.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (55) hide show

pixeltable/__init__.py +2 -1
pixeltable/catalog/catalog.py +187 -63
pixeltable/catalog/column.py +24 -20
pixeltable/catalog/table.py +24 -8
pixeltable/catalog/table_metadata.py +1 -0
pixeltable/catalog/table_version.py +16 -34
pixeltable/catalog/update_status.py +12 -0
pixeltable/catalog/view.py +22 -22
pixeltable/config.py +2 -0
pixeltable/dataframe.py +4 -2
pixeltable/env.py +46 -21
pixeltable/exec/__init__.py +1 -0
pixeltable/exec/aggregation_node.py +0 -1
pixeltable/exec/cache_prefetch_node.py +74 -98
pixeltable/exec/data_row_batch.py +2 -18
pixeltable/exec/expr_eval/expr_eval_node.py +11 -0
pixeltable/exec/in_memory_data_node.py +1 -1
pixeltable/exec/object_store_save_node.py +299 -0
pixeltable/exec/sql_node.py +28 -33
pixeltable/exprs/data_row.py +31 -25
pixeltable/exprs/json_path.py +6 -5
pixeltable/exprs/row_builder.py +6 -12
pixeltable/functions/gemini.py +1 -1
pixeltable/functions/openai.py +1 -1
pixeltable/functions/video.py +128 -15
pixeltable/functions/whisperx.py +2 -0
pixeltable/functions/yolox.py +2 -0
pixeltable/globals.py +49 -30
pixeltable/index/embedding_index.py +5 -8
pixeltable/io/__init__.py +1 -0
pixeltable/io/fiftyone.py +1 -1
pixeltable/io/label_studio.py +4 -5
pixeltable/iterators/__init__.py +1 -0
pixeltable/iterators/audio.py +1 -1
pixeltable/iterators/document.py +10 -12
pixeltable/iterators/video.py +1 -1
pixeltable/metadata/schema.py +7 -0
pixeltable/plan.py +26 -1
pixeltable/share/packager.py +8 -2
pixeltable/share/publish.py +3 -10
pixeltable/store.py +1 -1
pixeltable/type_system.py +1 -3
pixeltable/utils/dbms.py +31 -5
pixeltable/utils/gcs_store.py +283 -0
pixeltable/utils/local_store.py +316 -0
pixeltable/utils/object_stores.py +497 -0
pixeltable/utils/pytorch.py +5 -6
pixeltable/utils/s3_store.py +354 -0
{pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/METADATA +1 -1
{pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/RECORD +53 -50
pixeltable/utils/media_store.py +0 -248
pixeltable/utils/s3.py +0 -17
{pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/WHEEL +0 -0
{pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.13.dist-info → pixeltable-0.4.15.dist-info}/licenses/LICENSE +0 -0

pixeltable/exec/sql_node.py CHANGED Viewed

@@ -71,6 +71,13 @@ class SqlNode(ExecNode):
     If set_pk is True, they are added to the end of the result set when creating the SQL statement
     so they can always be referenced as cols[-num_pk_cols:] in the result set.
     The pk_columns consist of the rowid columns of the target table followed by the version number.
+    If row_builder contains references to unstored iter columns, expands the select list to include their
+    SQL-materializable subexpressions.
+    Args:
+        select_list: output of the query
+        set_pk: if True, sets the primary for each DataRow
     """
     tbl: Optional[catalog.TableVersionPath]
@@ -97,14 +104,6 @@ class SqlNode(ExecNode):
         sql_elements: exprs.SqlElementCache,
         set_pk: bool = False,
     ):
-        """
-        If row_builder contains references to unstored iter columns, expands the select list to include their
-        SQL-materializable subexpressions.
-        Args:
-            select_list: output of the query
-            set_pk: if True, sets the primary for each DataRow
-        """
         # create Select stmt
         self.sql_elements = sql_elements
         self.tbl = tbl
@@ -374,6 +373,11 @@ class SqlScanNode(SqlNode):
     Materializes data from the store via a Select stmt.
     Supports filtering and ordering.
+    Args:
+        select_list: output of the query
+        set_pk: if True, sets the primary for each DataRow
+        exact_version_only: tables for which we only want to see rows created at the current version
     """
     exact_version_only: list[catalog.TableVersionHandle]
@@ -386,12 +390,6 @@ class SqlScanNode(SqlNode):
         set_pk: bool = False,
         exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
     ):
-        """
-        Args:
-            select_list: output of the query
-            set_pk: if True, sets the primary for each DataRow
-            exact_version_only: tables for which we only want to see rows created at the current version
-        """
         sql_elements = exprs.SqlElementCache()
         super().__init__(tbl, row_builder, select_list, sql_elements, set_pk=set_pk)
         # create Select stmt
@@ -413,6 +411,11 @@ class SqlScanNode(SqlNode):
 class SqlLookupNode(SqlNode):
     """
     Materializes data from the store via a Select stmt with a WHERE clause that matches a list of key values
+    Args:
+        select_list: output of the query
+        sa_key_cols: list of key columns in the store table
+        key_vals: list of key values to look up
     """
     def __init__(
@@ -423,12 +426,6 @@ class SqlLookupNode(SqlNode):
         sa_key_cols: list[sql.Column],
         key_vals: list[tuple],
     ):
-        """
-        Args:
-            select_list: output of the query
-            sa_key_cols: list of key columns in the store table
-            key_vals: list of key values to look up
-        """
         sql_elements = exprs.SqlElementCache()
         super().__init__(tbl, row_builder, select_list, sql_elements, set_pk=True)
         # Where clause: (key-col-1, key-col-2, ...) IN ((val-1, val-2, ...), ...)
@@ -444,6 +441,11 @@ class SqlLookupNode(SqlNode):
 class SqlAggregationNode(SqlNode):
     """
     Materializes data from the store via a Select stmt with a WHERE clause that matches a list of key values
+    Args:
+        select_list: can contain calls to AggregateFunctions
+        group_by_items: list of expressions to group by
+        limit: max number of rows to return: None = no limit
     """
     group_by_items: Optional[list[exprs.Expr]]
@@ -458,12 +460,6 @@ class SqlAggregationNode(SqlNode):
         limit: Optional[int] = None,
         exact_version_only: Optional[list[catalog.TableVersion]] = None,
     ):
-        """
-        Args:
-            select_list: can contain calls to AggregateFunctions
-            group_by_items: list of expressions to group by
-            limit: max number of rows to return: None = no limit
-        """
         self.input_cte, input_col_map = input.to_cte()
         sql_elements = exprs.SqlElementCache(input_col_map)
         super().__init__(None, row_builder, select_list, sql_elements)
@@ -529,6 +525,12 @@ class SqlJoinNode(SqlNode):
 class SqlSampleNode(SqlNode):
     """
     Returns rows sampled from the input node.
+    Args:
+        input: SqlNode to sample from
+        select_list: can contain calls to AggregateFunctions
+        sample_clause: specifies the sampling method
+        stratify_exprs: Analyzer processed list of expressions to stratify by.
     """
     input_cte: Optional[sql.CTE]
@@ -544,13 +546,6 @@ class SqlSampleNode(SqlNode):
         sample_clause: 'SampleClause',
         stratify_exprs: list[exprs.Expr],
     ):
-        """
-        Args:
-            input: SqlNode to sample from
-            select_list: can contain calls to AggregateFunctions
-            sample_clause: specifies the sampling method
-            stratify_exprs: Analyzer processed list of expressions to stratify by.
-        """
         assert isinstance(input, SqlNode)
         self.input_cte, input_col_map = input.to_cte(keep_pk=True)
         self.pk_count = input.num_pk_cols

pixeltable/exprs/data_row.py CHANGED Viewed

@@ -14,7 +14,7 @@ import PIL.Image
 import sqlalchemy as sql
 from pixeltable import catalog, env
-from pixeltable.utils.media_store import MediaStore, TempStore
+from pixeltable.utils.local_store import TempStore
 class DataRow:
@@ -257,42 +257,48 @@ class DataRow:
             self.vals[idx] = val
         self.has_val[idx] = True
-    def flush_img(self, index: int, col: Optional[catalog.Column] = None) -> None:
-        """Save or discard the in-memory value (required to be a PIL.Image.Image)"""
+    def prepare_col_val_for_save(self, index: int, col: Optional[catalog.Column] = None) -> bool:
+        """
+        Prepare to save a column's value into the appropriate store. Discard unneeded values.
+        Return:
+            True if the media object in the column needs to be saved.
+        """
         if self.vals[index] is None:
-            return
+            return False
+        if self.file_urls[index] is not None:
+            return False
         assert self.excs[index] is None
         if self.file_paths[index] is None:
             if col is not None:
-                image = self.vals[index]
-                format = None
-                if isinstance(image, PIL.Image.Image):
-                    # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
-                    # In that case, use WebP instead.
-                    format = 'webp' if image.has_transparency_data else 'jpeg'
-                filepath, url = MediaStore.get().save_media_object(image, col, format=format)
-                self.file_paths[index] = str(filepath)
-                self.file_urls[index] = url
+                # This is a media object that needs to be saved
+                return True
             else:
-                # we discard the content of this cell
+                # This is a media object that we don't care about, so we discard it
                 self.has_val[index] = False
         else:
             # we already have a file for this image, nothing left to do
             pass
         self.vals[index] = None
+        return False
-    def move_tmp_media_file(self, index: int, col: catalog.Column) -> None:
-        """If a media url refers to data in a temporary file, move the data to a MediaStore"""
-        if self.file_urls[index] is None:
-            return
-        assert self.excs[index] is None
+    def save_media_to_temp(self, index: int, col: catalog.Column) -> str:
+        """Save the media object in the column to the TempStore.
+        Objects cannot be saved directly to general destinations."""
         assert col.col_type.is_media_type()
-        src_path = TempStore.resolve_url(self.file_urls[index])
-        if src_path is None:
-            # The media url does not point to a temporary file, leave it as is
-            return
-        new_file_url = MediaStore.get().relocate_local_media_file(src_path, col)
-        self.file_urls[index] = new_file_url
+        val = self.vals[index]
+        format = None
+        if isinstance(val, PIL.Image.Image):
+            # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
+            # In that case, use WebP instead.
+            format = 'webp' if val.has_transparency_data else 'jpeg'
+        filepath, url = TempStore.save_media_object(val, col, format=format)
+        self.file_paths[index] = str(filepath) if filepath is not None else None
+        self.vals[index] = None
+        return url
     @property
     def rowid(self) -> tuple[int, ...]:

pixeltable/exprs/json_path.py CHANGED Viewed

@@ -17,14 +17,15 @@ from .sql_element_cache import SqlElementCache
 class JsonPath(Expr):
+    """
+    anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
+    scope_idx: for relative paths, index of referenced JsonMapper
+    (0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
+    """
     def __init__(
         self, anchor: Optional[Expr], path_elements: Optional[list[str | int | slice]] = None, scope_idx: int = 0
     ) -> None:
-        """
-        anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
-        scope_idx: for relative paths, index of referenced JsonMapper
-        (0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
-        """
         if path_elements is None:
             path_elements = []
         super().__init__(ts.JsonType(nullable=True))  # JsonPath expressions are always nullable

pixeltable/exprs/row_builder.py CHANGED Viewed

@@ -48,6 +48,12 @@ class RowBuilder:
     For ColumnRefs to unstored iterator columns:
     - in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
+    Args:
+        output_exprs: list of Exprs to be evaluated
+        columns: list of columns to be materialized
+        input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
+    TODO: enforce that output_exprs doesn't overlap with input_exprs?
     """
     unique_exprs: ExprSet
@@ -105,13 +111,6 @@ class RowBuilder:
         input_exprs: Iterable[Expr],
         tbl: Optional[catalog.TableVersion] = None,
     ):
-        """
-        Args:
-            output_exprs: list of Exprs to be evaluated
-            columns: list of columns to be materialized
-            input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
-        TODO: enforce that output_exprs doesn't overlap with input_exprs?
-        """
         self.unique_exprs: ExprSet[Expr] = ExprSet()  # dependencies precede their dependents
         self.next_slot_idx = 0
         self.stored_img_cols = []
@@ -474,11 +473,6 @@ class RowBuilder:
                     # exceptions get stored in the errortype/-msg properties of the cellmd column
                     table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
             else:
-                if col.col_type.is_media_type():
-                    if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
-                        # we have yet to store this image
-                        data_row.flush_img(slot_idx, col)
-                    data_row.move_tmp_media_file(slot_idx, col)
                 val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
                 table_row.append(val)
                 if col.stores_cellmd:

pixeltable/functions/gemini.py CHANGED Viewed

@@ -15,7 +15,7 @@ import PIL.Image
 import pixeltable as pxt
 from pixeltable import env, exceptions as excs, exprs
 from pixeltable.utils.code import local_public_names
-from pixeltable.utils.media_store import TempStore
+from pixeltable.utils.local_store import TempStore
 if TYPE_CHECKING:
     from google import genai

pixeltable/functions/openai.py CHANGED Viewed

@@ -23,7 +23,7 @@ import pixeltable as pxt
 from pixeltable import env, exprs, type_system as ts
 from pixeltable.func import Batch, Tools
 from pixeltable.utils.code import local_public_names
-from pixeltable.utils.media_store import TempStore
+from pixeltable.utils.local_store import TempStore
 if TYPE_CHECKING:
     import openai

pixeltable/functions/video.py CHANGED Viewed

@@ -4,7 +4,6 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
 import logging
 import pathlib
-import shutil
 import subprocess
 from typing import Literal, NoReturn
@@ -17,7 +16,7 @@ import pixeltable as pxt
 import pixeltable.utils.av as av_utils
 from pixeltable.env import Env
 from pixeltable.utils.code import local_public_names
-from pixeltable.utils.media_store import TempStore
+from pixeltable.utils.local_store import TempStore
 _logger = logging.getLogger('pixeltable')
 _format_defaults: dict[str, tuple[str, str]] = {  # format -> (codec, ext)
@@ -49,6 +48,10 @@ class make_video(pxt.Aggregator):
     """
     Aggregator that creates a video from a sequence of images, using the default video encoder and yuv420p pixel format.
+    Follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video
+    TODO: provide parameters for video_encoder and pix_fmt
     Args:
         fps: Frames per second for the output video.
@@ -98,11 +101,6 @@ class make_video(pxt.Aggregator):
     fps: int
     def __init__(self, fps: int = 25):
-        """
-        Follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video
-        TODO: provide parameters for video_encoder and pix_fmt
-        """
         self.container = None
         self.stream = None
         self.fps = fps
@@ -328,6 +326,7 @@ def clip(
     Returns:
         New video containing only the specified time range or None if start_time is beyond the end of the video.
     """
+    Env.get().require_binary('ffmpeg')
     if start_time < 0:
         raise pxt.Error(f'start_time must be non-negative, got {start_time}')
     if end_time is not None and end_time <= start_time:
@@ -336,8 +335,6 @@ def clip(
         raise pxt.Error(f'duration must be positive, got {duration}')
     if end_time is not None and duration is not None:
         raise pxt.Error('end_time and duration cannot both be specified')
-    if not shutil.which('ffmpeg'):
-        raise pxt.Error('ffmpeg is not installed or not in PATH. Please install ffmpeg to use get_clip().')
     video_duration = av_utils.get_video_duration(video)
     if video_duration is not None and start_time > video_duration:
@@ -389,10 +386,9 @@ def segment_video(video: pxt.Video, *, duration: float) -> list[str]:
         >>> duration = tbl.video.get_duration()
         >>> tbl.select(segment_paths=tbl.video.segment_video(duration=duration / 2 + 1)).collect()
     """
+    Env.get().require_binary('ffmpeg')
     if duration <= 0:
         raise pxt.Error(f'duration must be positive, got {duration}')
-    if not shutil.which('ffmpeg'):
-        raise pxt.Error('ffmpeg is not installed or not in PATH. Please install ffmpeg to use segment_video().')
     base_path = TempStore.create_path(extension='')
@@ -437,10 +433,9 @@ def concat_videos(videos: list[pxt.Video]) -> pxt.Video:
     Returns:
         A new video containing the merged videos.
     """
+    Env.get().require_binary('ffmpeg')
     if len(videos) == 0:
         raise pxt.Error('concat_videos(): empty argument list')
-    if not shutil.which('ffmpeg'):
-        raise pxt.Error('ffmpeg is not installed or not in PATH. Please install ffmpeg to use concat_videos().')
     # Check that all videos have the same resolution
     resolutions: list[tuple[int, int]] = []
@@ -529,6 +524,125 @@ def concat_videos(videos: list[pxt.Video]) -> pxt.Video:
         filelist_path.unlink()
+@pxt.udf
+def with_audio(
+    video: pxt.Video,
+    audio: pxt.Audio,
+    *,
+    video_start_time: float = 0.0,
+    video_duration: float | None = None,
+    audio_start_time: float = 0.0,
+    audio_duration: float | None = None,
+) -> pxt.Video:
+    """
+    Creates a new video that combines the video stream from `video` and the audio stream from `audio`.
+    The `start_time` and `duration` parameters can be used to select a specific time range from each input.
+    If the audio input (or selected time range) is longer than the video, the audio will be truncated.
+    __Requirements:__
+    - `ffmpeg` needs to be installed and in PATH
+    Args:
+        video: Input video.
+        audio: Input audio.
+        video_start_time: Start time in the video input (in seconds).
+        video_duration: Duration of video segment (in seconds). If None, uses the remainder of the video after
+            `video_start_time`. `video_duration` determines the duration of the output video.
+        audio_start_time: Start time in the audio input (in seconds).
+        audio_duration: Duration of audio segment (in seconds). If None, uses the remainder of the audio after
+            `audio_start_time`. If the audio is longer than the output video, it will be truncated.
+    Returns:
+        A new video file with the audio track added.
+    Examples:
+        Add background music to a video:
+        >>> tbl.select(tbl.video.with_audio(tbl.music_track)).collect()
+        Add audio starting 5 seconds into both files:
+        >>> tbl.select(
+        ...     tbl.video.with_audio(
+        ...         tbl.music_track,
+        ...         video_start_time=5.0,
+        ...         audio_start_time=5.0
+        ...     )
+        ... ).collect()
+        Use a 10-second clip from the middle of both files:
+        >>> tbl.select(
+        ...     tbl.video.with_audio(
+        ...         tbl.music_track,
+        ...         video_start_time=30.0,
+        ...         video_duration=10.0,
+        ...         audio_start_time=15.0,
+        ...         audio_duration=10.0
+        ...     )
+        ... ).collect()
+    """
+    Env.get().require_binary('ffmpeg')
+    if video_start_time < 0:
+        raise pxt.Error(f'video_offset must be non-negative, got {video_start_time}')
+    if audio_start_time < 0:
+        raise pxt.Error(f'audio_offset must be non-negative, got {audio_start_time}')
+    if video_duration is not None and video_duration <= 0:
+        raise pxt.Error(f'video_duration must be positive, got {video_duration}')
+    if audio_duration is not None and audio_duration <= 0:
+        raise pxt.Error(f'audio_duration must be positive, got {audio_duration}')
+    output_path = str(TempStore.create_path(extension='.mp4'))
+    cmd = ['ffmpeg']
+    if video_start_time > 0:
+        # fast seek, must precede -i
+        cmd.extend(['-ss', str(video_start_time)])
+    if video_duration is not None:
+        cmd.extend(['-t', str(video_duration)])
+    else:
+        video_duration = av_utils.get_video_duration(video)
+    cmd.extend(['-i', str(video)])
+    if audio_start_time > 0:
+        cmd.extend(['-ss', str(audio_start_time)])
+    if audio_duration is not None:
+        cmd.extend(['-t', str(audio_duration)])
+    cmd.extend(['-i', str(audio)])
+    cmd.extend(
+        [
+            '-map',
+            '0:v:0',  # video from first input
+            '-map',
+            '1:a:0',  # audio from second input
+            '-c:v',
+            'copy',  # avoid re-encoding
+            '-c:a',
+            'copy',  # avoid re-encoding
+            '-t',
+            str(video_duration),  # limit output duration to video duration
+            '-loglevel',
+            'error',  # only show errors
+            output_path,
+        ]
+    )
+    _logger.debug(f'with_audio(): {" ".join(cmd)}')
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        output_file = pathlib.Path(output_path)
+        if not output_file.exists() or output_file.stat().st_size == 0:
+            stderr_output = result.stderr.strip() if result.stderr is not None else ''
+            raise pxt.Error(f'ffmpeg failed to create output file for commandline: {" ".join(cmd)}\n{stderr_output}')
+        return output_path
+    except subprocess.CalledProcessError as e:
+        _handle_ffmpeg_error(e)
 @pxt.udf(is_method=True)
 def overlay_text(
     video: pxt.Video,
@@ -615,8 +729,7 @@ def overlay_text(
         ...     )
         ... ).collect()
     """
-    if not shutil.which('ffmpeg'):
-        raise pxt.Error('ffmpeg is not installed or not in PATH. Please install ffmpeg to use overlay_text().')
+    Env.get().require_binary('ffmpeg')
     if font_size <= 0:
         raise pxt.Error(f'font_size must be positive, got {font_size}')
     if opacity < 0.0 or opacity > 1.0:

pixeltable/functions/whisperx.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""WhisperX audio transcription and diarization functions."""
 from typing import TYPE_CHECKING, Any, Optional
 import numpy as np

pixeltable/functions/yolox.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""YOLOX object detection functions."""
 import logging
 from typing import TYPE_CHECKING

pixeltable/globals.py CHANGED Viewed

@@ -397,40 +397,54 @@ def create_snapshot(
     )
-def create_replica(
-    destination: str,
+def publish(
     source: str | catalog.Table,
+    destination_uri: str,
     bucket_name: str | None = None,
     access: Literal['public', 'private'] = 'private',
-) -> Optional[catalog.Table]:
+) -> None:
     """
-    Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
-    replica of a remote table. A given table can have at most one replica per Pixeltable instance.
+    Publishes a replica of a local Pixeltable table to Pixeltable cloud. A given table can be published to at most one
+    URI per Pixeltable cloud database.
     Args:
-        destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
-            a remote URI such as `'pxt://username/mydir.my_table'`.
-        source: Path to the source table, or (if the source table is a local table) a handle to the source table.
-        bucket_name: The name of the pixeltable cloud-registered bucket to use to store replica's data.
-            If no `bucket_name` is provided, the default Pixeltable storage bucket will be used.
+        source: Path or table handle of the local table to be published.
+        destination_uri: Remote URI where the replica will be published, such as `'pxt://org_name/my_dir/my_table'`.
+        bucket_name: The name of the bucket to use to store replica's data. The bucket must be registered with
+            Pixeltable cloud. If no `bucket_name` is provided, the default storage bucket for the destination
+            database will be used.
         access: Access control for the replica.
             - `'public'`: Anyone can access this replica.
-            - `'private'`: Only the owner can access.
+            - `'private'`: Only the host organization can access.
     """
-    remote_dest = destination.startswith('pxt://')
-    remote_source = isinstance(source, str) and source.startswith('pxt://')
-    if remote_dest == remote_source:
-        raise excs.Error('Exactly one of `destination` or `source` must be a remote URI.')
-    if remote_dest:
-        if isinstance(source, str):
-            source = get_table(source)
-        share.push_replica(destination, source, bucket_name, access)
-        return None
-    else:
-        assert isinstance(source, str)
-        return share.pull_replica(destination, source)
+    if not destination_uri.startswith('pxt://'):
+        raise excs.Error("`destination_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
+    if isinstance(source, str):
+        source = get_table(source)
+    share.push_replica(destination_uri, source, bucket_name, access)
+def replicate(remote_uri: str, local_path: str) -> catalog.Table:
+    """
+    Retrieve a replica from Pixeltable cloud as a local table. This will create a full local copy of the replica in a
+    way that preserves the table structure of the original source data. Once replicated, the local table can be
+    queried offline just as any other Pixeltable table.
+    Args:
+        remote_uri: Remote URI of the table to be replicated, such as `'pxt://org_name/my_dir/my_table'`.
+        local_path: Local table path where the replica will be created, such as `'my_new_dir.my_new_tbl'`. It can be
+            the same or different from the cloud table name.
+    Returns:
+        A handle to the newly created local replica table.
+    """
+    if not remote_uri.startswith('pxt://'):
+        raise excs.Error("`remote_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
+    return share.pull_replica(local_path, remote_uri)
 def get_table(path: str) -> catalog.Table:
@@ -498,10 +512,11 @@ def move(path: str, new_path: str) -> None:
 def drop_table(
     table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
 ) -> None:
-    """Drop a table, view, or snapshot.
+    """Drop a table, view, snapshot, or replica.
     Args:
-        table: Fully qualified name, or handle, of the table to be dropped.
+        table: Fully qualified name or table handle of the table to be dropped; or a remote URI of a cloud replica to
+            be deleted.
         force: If `True`, will also drop all views and sub-views of this table.
         if_not_exists: Directive regarding how to handle if the path does not exist.
             Must be one of the following:
@@ -541,13 +556,17 @@ def drop_table(
         assert isinstance(table, str)
         tbl_path = table
+    if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
     if tbl_path.startswith('pxt://'):
         # Remote table
+        if force:
+            raise excs.Error('Cannot use `force=True` with a cloud replica URI.')
+        # TODO: Handle if_not_exists properly
         share.delete_replica(tbl_path)
     else:
         # Local table
         path_obj = catalog.Path.parse(tbl_path)
-        if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
         Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
@@ -763,15 +782,15 @@ def ls(path: str = '') -> pd.DataFrame:
                 base = md['base'] or ''
                 if base.startswith('_'):
                     base = '<anonymous base table>'
-                if md['is_snapshot']:
+                if md['is_replica']:
+                    kind = 'replica'
+                elif md['is_snapshot']:
                     kind = 'snapshot'
                 elif md['is_view']:
                     kind = 'view'
                 else:
                     kind = 'table'
                 version = '' if kind == 'snapshot' else str(md['version'])
-                if md['is_replica']:
-                    kind = f'{kind}-replica'
             rows.append([name, kind, version, base])
         return rows

pixeltable 0.4.13__py3-none-any.whl → 0.4.15__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.13py3-none-any.whl → 0.4.15py3-none-any.whl