PyPI - pixeltable - Versions diffs - 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl - Mend

pixeltable 0.4.17py3-none-any.whl → 0.4.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (153) hide show

pixeltable/__init__.py +1 -1
pixeltable/_version.py +1 -0
pixeltable/catalog/catalog.py +144 -118
pixeltable/catalog/column.py +104 -115
pixeltable/catalog/globals.py +1 -2
pixeltable/catalog/insertable_table.py +44 -49
pixeltable/catalog/path.py +3 -4
pixeltable/catalog/schema_object.py +4 -4
pixeltable/catalog/table.py +139 -124
pixeltable/catalog/table_metadata.py +6 -6
pixeltable/catalog/table_version.py +315 -246
pixeltable/catalog/table_version_handle.py +4 -4
pixeltable/catalog/table_version_path.py +9 -10
pixeltable/catalog/tbl_ops.py +9 -3
pixeltable/catalog/view.py +34 -28
pixeltable/config.py +14 -10
pixeltable/dataframe.py +69 -78
pixeltable/env.py +78 -64
pixeltable/exec/aggregation_node.py +6 -6
pixeltable/exec/cache_prefetch_node.py +10 -10
pixeltable/exec/data_row_batch.py +3 -3
pixeltable/exec/exec_context.py +16 -4
pixeltable/exec/exec_node.py +5 -5
pixeltable/exec/expr_eval/evaluators.py +6 -6
pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
pixeltable/exec/expr_eval/globals.py +6 -6
pixeltable/exec/expr_eval/row_buffer.py +1 -2
pixeltable/exec/expr_eval/schedulers.py +11 -11
pixeltable/exec/in_memory_data_node.py +2 -2
pixeltable/exec/object_store_save_node.py +14 -17
pixeltable/exec/sql_node.py +28 -27
pixeltable/exprs/arithmetic_expr.py +4 -4
pixeltable/exprs/array_slice.py +2 -2
pixeltable/exprs/column_property_ref.py +3 -3
pixeltable/exprs/column_ref.py +61 -74
pixeltable/exprs/comparison.py +5 -5
pixeltable/exprs/compound_predicate.py +3 -3
pixeltable/exprs/data_row.py +12 -12
pixeltable/exprs/expr.py +41 -31
pixeltable/exprs/expr_dict.py +3 -3
pixeltable/exprs/expr_set.py +3 -3
pixeltable/exprs/function_call.py +14 -14
pixeltable/exprs/in_predicate.py +4 -4
pixeltable/exprs/inline_expr.py +8 -8
pixeltable/exprs/is_null.py +1 -3
pixeltable/exprs/json_mapper.py +8 -8
pixeltable/exprs/json_path.py +6 -6
pixeltable/exprs/literal.py +5 -5
pixeltable/exprs/method_ref.py +2 -2
pixeltable/exprs/object_ref.py +2 -2
pixeltable/exprs/row_builder.py +14 -14
pixeltable/exprs/rowid_ref.py +8 -8
pixeltable/exprs/similarity_expr.py +50 -25
pixeltable/exprs/sql_element_cache.py +4 -4
pixeltable/exprs/string_op.py +2 -2
pixeltable/exprs/type_cast.py +3 -5
pixeltable/func/aggregate_function.py +8 -8
pixeltable/func/callable_function.py +9 -9
pixeltable/func/expr_template_function.py +3 -3
pixeltable/func/function.py +15 -17
pixeltable/func/function_registry.py +6 -7
pixeltable/func/globals.py +2 -3
pixeltable/func/mcp.py +2 -2
pixeltable/func/query_template_function.py +16 -16
pixeltable/func/signature.py +14 -14
pixeltable/func/tools.py +11 -11
pixeltable/func/udf.py +16 -18
pixeltable/functions/__init__.py +1 -0
pixeltable/functions/anthropic.py +7 -7
pixeltable/functions/audio.py +76 -0
pixeltable/functions/bedrock.py +6 -6
pixeltable/functions/deepseek.py +4 -4
pixeltable/functions/fireworks.py +2 -2
pixeltable/functions/gemini.py +6 -6
pixeltable/functions/globals.py +12 -12
pixeltable/functions/groq.py +4 -4
pixeltable/functions/huggingface.py +1033 -6
pixeltable/functions/image.py +7 -10
pixeltable/functions/llama_cpp.py +7 -7
pixeltable/functions/math.py +2 -3
pixeltable/functions/mistralai.py +3 -3
pixeltable/functions/ollama.py +9 -9
pixeltable/functions/openai.py +21 -21
pixeltable/functions/openrouter.py +7 -7
pixeltable/functions/string.py +21 -28
pixeltable/functions/timestamp.py +7 -8
pixeltable/functions/together.py +4 -6
pixeltable/functions/twelvelabs.py +92 -0
pixeltable/functions/video.py +36 -31
pixeltable/functions/vision.py +6 -6
pixeltable/functions/whisper.py +7 -7
pixeltable/functions/whisperx.py +16 -16
pixeltable/globals.py +75 -40
pixeltable/index/base.py +12 -8
pixeltable/index/btree.py +19 -22
pixeltable/index/embedding_index.py +30 -39
pixeltable/io/datarows.py +3 -3
pixeltable/io/external_store.py +13 -16
pixeltable/io/fiftyone.py +5 -5
pixeltable/io/globals.py +5 -5
pixeltable/io/hf_datasets.py +4 -4
pixeltable/io/label_studio.py +12 -12
pixeltable/io/pandas.py +6 -6
pixeltable/io/parquet.py +2 -2
pixeltable/io/table_data_conduit.py +12 -12
pixeltable/io/utils.py +2 -2
pixeltable/iterators/audio.py +2 -2
pixeltable/iterators/document.py +88 -57
pixeltable/iterators/video.py +66 -37
pixeltable/metadata/converters/convert_18.py +2 -2
pixeltable/metadata/converters/convert_19.py +2 -2
pixeltable/metadata/converters/convert_20.py +2 -2
pixeltable/metadata/converters/convert_21.py +2 -2
pixeltable/metadata/converters/convert_22.py +2 -2
pixeltable/metadata/converters/convert_24.py +2 -2
pixeltable/metadata/converters/convert_25.py +2 -2
pixeltable/metadata/converters/convert_26.py +2 -2
pixeltable/metadata/converters/convert_29.py +4 -4
pixeltable/metadata/converters/convert_34.py +2 -2
pixeltable/metadata/converters/convert_36.py +2 -2
pixeltable/metadata/converters/convert_38.py +2 -2
pixeltable/metadata/converters/convert_39.py +1 -2
pixeltable/metadata/converters/util.py +11 -13
pixeltable/metadata/schema.py +22 -21
pixeltable/metadata/utils.py +2 -6
pixeltable/mypy/mypy_plugin.py +5 -5
pixeltable/plan.py +32 -34
pixeltable/share/packager.py +7 -7
pixeltable/share/publish.py +3 -3
pixeltable/store.py +126 -41
pixeltable/type_system.py +43 -46
pixeltable/utils/__init__.py +1 -2
pixeltable/utils/arrow.py +4 -4
pixeltable/utils/av.py +74 -38
pixeltable/utils/azure_store.py +305 -0
pixeltable/utils/code.py +1 -2
pixeltable/utils/dbms.py +15 -19
pixeltable/utils/description_helper.py +2 -3
pixeltable/utils/documents.py +5 -6
pixeltable/utils/exception_handler.py +2 -2
pixeltable/utils/filecache.py +5 -5
pixeltable/utils/formatter.py +4 -6
pixeltable/utils/gcs_store.py +9 -9
pixeltable/utils/local_store.py +17 -17
pixeltable/utils/object_stores.py +59 -43
pixeltable/utils/s3_store.py +35 -30
{pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
pixeltable-0.4.19.dist-info/RECORD +213 -0
pixeltable/__version__.py +0 -3
pixeltable-0.4.17.dist-info/RECORD +0 -211
{pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
{pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
{pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0

pixeltable/functions/twelvelabs.py ADDED Viewed

@@ -0,0 +1,92 @@
+"""
+Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
+that wrap various endpoints from the TwelveLabs API. In order to use them, you must
+first `pip install twelvelabs` and configure your TwelveLabs credentials, as described in
+the [Working with TwelveLabs](https://pixeltable.readme.io/docs/working-with-twelvelabs) tutorial.
+"""
+from typing import TYPE_CHECKING, Any, Literal
+import numpy as np
+import pixeltable as pxt
+from pixeltable import env
+from pixeltable.utils.code import local_public_names
+if TYPE_CHECKING:
+    from twelvelabs import AsyncTwelveLabs
+@env.register_client('twelvelabs')
+def _(api_key: str) -> 'AsyncTwelveLabs':
+    from twelvelabs import AsyncTwelveLabs
+    return AsyncTwelveLabs(api_key=api_key)
+def _twelvelabs_client() -> 'AsyncTwelveLabs':
+    return env.Env.get().get_client('twelvelabs')
+@pxt.udf(resource_pool='request-rate:twelvelabs')
+async def embed(
+    model_name: str,
+    *,
+    text: str | None = None,
+    text_truncate: Literal['none', 'start', 'end'] | None = None,
+    audio: pxt.Audio | None = None,
+    # TODO: support images
+    # image: pxt.Image | None = None,
+    **kwargs: Any,
+) -> pxt.Array[(1024,), pxt.Float]:
+    """
+    Creates an embedding vector for the given `text`, `audio`, or `image` parameter. Only one of `text`, `audio`, or
+    `image` may be specified.
+    Equivalent to the TwelveLabs Embed API.
+    https://docs.twelvelabs.io/v1.3/docs/guides/create-embeddings
+    Request throttling:
+    Applies the rate limit set in the config (section `twelvelabs`, key `rate_limit`). If no rate
+    limit is configured, uses a default of 600 RPM.
+    __Requirements:__
+    - `pip install twelvelabs`
+    Args:
+        model_name: The name of the model to use. Check
+            [the TwelveLabs documentation](https://docs.twelvelabs.io/v1.3/sdk-reference/python/create-text-image-and-audio-embeddings)
+            for available models.
+        text: The text to embed.
+        text_truncate: Truncation mode for the text.
+        audio: The audio to embed.
+    Returns:
+        The embedding.
+    Examples:
+        Add a computed column `embed` for an embedding of a string column `input`:
+        >>> tbl.add_computed_column(
+        ...     embed=embed(model_name='Marengo-retrieval-2.7', text=tbl.input)
+        ... )
+    """
+    cl = _twelvelabs_client()
+    res = await cl.embed.create(
+        model_name=model_name, text=text, text_truncate=text_truncate, audio_file=audio, **kwargs
+    )
+    if text is not None:
+        if res.text_embedding is None:
+            raise pxt.Error(f"Didn't receive embedding for text: {text}")
+        vector = res.text_embedding.segments[0].float_
+        return np.array(vector, dtype=np.float64)
+    # TODO: handle audio and image, once we know how to get a non-error response
+    return None
+__all__ = local_public_names(__name__)
+def __dir__() -> list[str]:
+    return __all__

pixeltable/functions/video.py CHANGED Viewed

@@ -20,28 +20,6 @@ from pixeltable.utils.code import local_public_names
 from pixeltable.utils.local_store import TempStore
 _logger = logging.getLogger('pixeltable')
-_format_defaults: dict[str, tuple[str, str]] = {  # format -> (codec, ext)
-    'wav': ('pcm_s16le', 'wav'),
-    'mp3': ('libmp3lame', 'mp3'),
-    'flac': ('flac', 'flac'),
-    # 'mp4': ('aac', 'm4a'),
-}
-# for mp4:
-# - extract_audio() fails with
-#   "Application provided invalid, non monotonically increasing dts to muxer in stream 0: 1146 >= 290"
-# - chatgpt suggests this can be fixed in the following manner
-#     for packet in container.demux(audio_stream):
-#         packet.pts = None  # Reset the PTS and DTS to allow FFmpeg to set them automatically
-#         packet.dts = None
-#         for frame in packet.decode():
-#             frame.pts = None
-#             for packet in output_stream.encode(frame):
-#                 output_container.mux(packet)
-#
-#     # Flush remaining packets
-#     for packet in output_stream.encode():
-#         output_container.mux(packet)
 @pxt.uda(requires_order_by=True)
@@ -150,9 +128,9 @@ def extract_audio(
         ...     extracted_audio=tbl.video_col.extract_audio(format='flac')
         ... )
     """
-    if format not in _format_defaults:
+    if format not in av_utils.AUDIO_FORMATS:
         raise ValueError(f'extract_audio(): unsupported audio format: {format}')
-    default_codec, ext = _format_defaults[format]
+    default_codec, ext = av_utils.AUDIO_FORMATS[format]
     with av.open(video_path) as container:
         if len(container.streams.audio) <= stream_idx:
@@ -306,7 +284,14 @@ def _handle_ffmpeg_error(e: subprocess.CalledProcessError) -> NoReturn:
 @pxt.udf(is_method=True)
 def clip(
-    video: pxt.Video, *, start_time: float, end_time: float | None = None, duration: float | None = None
+    video: pxt.Video,
+    *,
+    start_time: float,
+    end_time: float | None = None,
+    duration: float | None = None,
+    mode: Literal['fast', 'accurate'] = 'accurate',
+    video_encoder: str | None = None,
+    video_encoder_args: dict[str, Any] | None = None,
 ) -> pxt.Video | None:
     """
     Extract a clip from a video, specified by `start_time` and either `end_time` or `duration` (in seconds).
@@ -323,6 +308,14 @@ def clip(
         start_time: Start time in seconds
         end_time: End time in seconds
         duration: Duration of the clip in seconds
+        mode:
+            - `'fast'`: avoids re-encoding but starts the clip at the nearest keyframes and as a result, the clip
+                duration will be slightly longer than requested
+            - `'accurate'`: extracts a frame-accurate clip, but requires re-encoding
+        video_encoder: Video encoder to use. If not specified, uses the default encoder for the current platform.
+            Only available for `mode='accurate'`.
+        video_encoder_args: Additional arguments to pass to the video encoder. Only available for `mode='accurate'`.
     Returns:
         New video containing only the specified time range or None if start_time is beyond the end of the video.
@@ -336,6 +329,11 @@ def clip(
         raise pxt.Error(f'duration must be positive, got {duration}')
     if end_time is not None and duration is not None:
         raise pxt.Error('end_time and duration cannot both be specified')
+    if mode == 'fast':
+        if video_encoder is not None:
+            raise pxt.Error("video_encoder is not supported for mode='fast'")
+        if video_encoder_args is not None:
+            raise pxt.Error("video_encoder_args is not supported for mode='fast'")
     video_duration = av_utils.get_video_duration(video)
     if video_duration is not None and start_time > video_duration:
@@ -345,7 +343,15 @@ def clip(
     if end_time is not None:
         duration = end_time - start_time
-    cmd = av_utils.ffmpeg_clip_cmd(str(video), output_path, start_time, duration)
+    cmd = av_utils.ffmpeg_clip_cmd(
+        str(video),
+        output_path,
+        start_time,
+        duration,
+        fast=(mode == 'fast'),
+        video_encoder=video_encoder,
+        video_encoder_args=video_encoder_args,
+    )
     try:
         result = subprocess.run(cmd, capture_output=True, text=True, check=True)
@@ -364,7 +370,7 @@ def segment_video(
     *,
     duration: float | None = None,
     segment_times: list[float] | None = None,
-    mode: Literal['fast', 'accurate'] = 'fast',
+    mode: Literal['fast', 'accurate'] = 'accurate',
     video_encoder: str | None = None,
     video_encoder_args: dict[str, Any] | None = None,
 ) -> list[str]:
@@ -400,15 +406,14 @@ def segment_video(
     Examples:
         Split a video at 1 minute intervals using fast mode:
-        >>> tbl.select(segment_paths=tbl.video.segment_video(duration=60)).collect()
+        >>> tbl.select(segment_paths=tbl.video.segment_video(duration=60, mode='fast')).collect()
-        Split video into exact 10-second segments with accurate mode, using the libx264 encoder with a CRF of 23 and
-        slow preset (for smaller output files):
+        Split video into exact 10-second segments with default accurate mode, using the libx264 encoder with a CRF of 23
+        and slow preset (for smaller output files):
         >>> tbl.select(
         ...     segment_paths=tbl.video.segment_video(
         ...         duration=10,
-        ...         mode='accurate',
         ...         video_encoder='libx264',
         ...         video_encoder_args={'crf': 23, 'preset': 'slow'}
         ...     )

pixeltable/functions/vision.py CHANGED Viewed

@@ -14,7 +14,7 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
 import colorsys
 import hashlib
 from collections import defaultdict
-from typing import Any, Optional
+from typing import Any
 import numpy as np
 import PIL.Image
@@ -293,13 +293,13 @@ def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
 def draw_bounding_boxes(
     img: PIL.Image.Image,
     boxes: list[list[int]],
-    labels: Optional[list[Any]] = None,
-    color: Optional[str] = None,
-    box_colors: Optional[list[str]] = None,
+    labels: list[Any] | None = None,
+    color: str | None = None,
+    box_colors: list[str] | None = None,
     fill: bool = False,
     width: int = 1,
-    font: Optional[str] = None,
-    font_size: Optional[int] = None,
+    font: str | None = None,
+    font_size: int | None = None,
 ) -> PIL.Image.Image:
     """
     Draws bounding boxes on the given image.

pixeltable/functions/whisper.py CHANGED Viewed

@@ -6,7 +6,7 @@ This UDF will cause Pixeltable to invoke the relevant model locally. In order to
 first `pip install openai-whisper`.
 """
-from typing import TYPE_CHECKING, Optional, Sequence
+from typing import TYPE_CHECKING, Sequence
 import pixeltable as pxt
 from pixeltable.env import Env
@@ -21,16 +21,16 @@ def transcribe(
     audio: pxt.Audio,
     *,
     model: str,
-    temperature: Optional[Sequence[float]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
-    compression_ratio_threshold: Optional[float] = 2.4,
-    logprob_threshold: Optional[float] = -1.0,
-    no_speech_threshold: Optional[float] = 0.6,
+    temperature: Sequence[float] | None = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
+    compression_ratio_threshold: float | None = 2.4,
+    logprob_threshold: float | None = -1.0,
+    no_speech_threshold: float | None = 0.6,
     condition_on_previous_text: bool = True,
-    initial_prompt: Optional[str] = None,
+    initial_prompt: str | None = None,
     word_timestamps: bool = False,
     prepend_punctuations: str = '"\'“¿([{-',
     append_punctuations: str = '"\'.。,，!！?？:：”)]}、',  # noqa: RUF001
-    decode_options: Optional[dict] = None,
+    decode_options: dict | None = None,
 ) -> dict:
     """
     Transcribe an audio file using Whisper.

pixeltable/functions/whisperx.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """WhisperX audio transcription and diarization functions."""
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any
 import numpy as np
@@ -21,17 +21,17 @@ def transcribe(
     *,
     model: str,
     diarize: bool = False,
-    compute_type: Optional[str] = None,
-    language: Optional[str] = None,
-    task: Optional[str] = None,
-    chunk_size: Optional[int] = None,
-    alignment_model_name: Optional[str] = None,
-    interpolate_method: Optional[str] = None,
-    return_char_alignments: Optional[bool] = None,
-    diarization_model_name: Optional[str] = None,
-    num_speakers: Optional[int] = None,
-    min_speakers: Optional[int] = None,
-    max_speakers: Optional[int] = None,
+    compute_type: str | None = None,
+    language: str | None = None,
+    task: str | None = None,
+    chunk_size: int | None = None,
+    alignment_model_name: str | None = None,
+    interpolate_method: str | None = None,
+    return_char_alignments: bool | None = None,
+    diarization_model_name: str | None = None,
+    num_speakers: int | None = None,
+    min_speakers: int | None = None,
+    max_speakers: int | None = None,
 ) -> dict:
     """
     Transcribe an audio file using WhisperX.
@@ -144,7 +144,7 @@ def _lookup_transcription_model(model: str, device: str, compute_type: str) -> '
     return _model_cache[key]
-def _lookup_alignment_model(language_code: str, device: str, model_name: Optional[str]) -> tuple['Wav2Vec2Model', dict]:
+def _lookup_alignment_model(language_code: str, device: str, model_name: str | None) -> tuple['Wav2Vec2Model', dict]:
     import whisperx
     key = (language_code, device, model_name)
@@ -154,7 +154,7 @@ def _lookup_alignment_model(language_code: str, device: str, model_name: Optiona
     return _alignment_model_cache[key]
-def _lookup_diarization_model(device: str, model_name: Optional[str]) -> 'DiarizationPipeline':
+def _lookup_diarization_model(device: str, model_name: str | None) -> 'DiarizationPipeline':
     from whisperx.diarize import DiarizationPipeline
     key = (device, model_name)
@@ -168,8 +168,8 @@ def _lookup_diarization_model(device: str, model_name: Optional[str]) -> 'Diariz
 _model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
-_alignment_model_cache: dict[tuple[str, str, Optional[str]], tuple['Wav2Vec2Model', dict]] = {}
-_diarization_model_cache: dict[tuple[str, Optional[str]], 'DiarizationPipeline'] = {}
+_alignment_model_cache: dict[tuple[str, str, str | None], tuple['Wav2Vec2Model', dict]] = {}
+_diarization_model_cache: dict[tuple[str, str | None], 'DiarizationPipeline'] = {}
 __all__ = local_public_names(__name__)

pixeltable/globals.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import logging
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Iterable, Literal, NamedTuple, Optional, Union
+from typing import TYPE_CHECKING, Any, Iterable, Literal, NamedTuple, Union
 import pandas as pd
 import pydantic
@@ -14,6 +14,7 @@ from pixeltable.catalog import Catalog, TableVersionPath
 from pixeltable.catalog.insertable_table import OnErrorParameter
 from pixeltable.config import Config
 from pixeltable.env import Env
+from pixeltable.io.table_data_conduit import DFTableDataConduit, TableDataConduit
 from pixeltable.iterators import ComponentIterator
 if TYPE_CHECKING:
@@ -36,7 +37,7 @@ if TYPE_CHECKING:
 _logger = logging.getLogger('pixeltable')
-def init(config_overrides: Optional[dict[str, Any]] = None) -> None:
+def init(config_overrides: dict[str, Any] | None = None) -> None:
     """Initializes the Pixeltable environment."""
     if config_overrides is None:
         config_overrides = {}
@@ -46,18 +47,19 @@ def init(config_overrides: Optional[dict[str, Any]] = None) -> None:
 def create_table(
     path: str,
-    schema: Optional[dict[str, Any]] = None,
+    schema: dict[str, Any] | None = None,
     *,
-    source: Optional[TableDataSource] = None,
-    source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
-    schema_overrides: Optional[dict[str, Any]] = None,
+    source: TableDataSource | None = None,
+    source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
+    schema_overrides: dict[str, Any] | None = None,
+    create_default_idxs: bool = True,
     on_error: Literal['abort', 'ignore'] = 'abort',
     primary_key: str | list[str] | None = None,
     num_retained_versions: int = 10,
     comment: str = '',
     media_validation: Literal['on_read', 'on_write'] = 'on_write',
     if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
-    extra_args: Optional[dict[str, Any]] = None,  # Additional arguments to data source provider
+    extra_args: dict[str, Any] | None = None,  # Additional arguments to data source provider
 ) -> catalog.Table:
     """Create a new base table. Exactly one of `schema` or `source` must be provided.
@@ -77,6 +79,8 @@ def create_table(
         schema_overrides: Must be used in conjunction with a `source`.
             If specified, then columns in `schema_overrides` will be given the specified types.
             (Pixeltable will attempt to infer the types of any columns not specified.)
+        create_default_idxs: If True, creates a B-tree index on every scalar and media column that is not computed,
+            except for boolean columns.
         on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
             invalid media file (such as a corrupt image) for one of the inserted rows.
@@ -138,7 +142,7 @@ def create_table(
         >>> tbl = pxt.create_table('my_table', source='data.csv')
     """
-    from pixeltable.io.table_data_conduit import DFTableDataConduit, UnkTableDataConduit
+    from pixeltable.io.table_data_conduit import UnkTableDataConduit
     from pixeltable.io.utils import normalize_primary_key_parameter
     if (schema is None) == (source is None):
@@ -150,11 +154,16 @@ def create_table(
     path_obj = catalog.Path.parse(path)
     if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
     media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
-    primary_key: Optional[list[str]] = normalize_primary_key_parameter(primary_key)
-    table: catalog.Table = None
-    tds = None
-    data_source = None
+    primary_key: list[str] | None = normalize_primary_key_parameter(primary_key)
+    data_source: TableDataConduit | None = None
     if source is not None:
+        if isinstance(source, str) and source.strip().startswith('pxt://'):
+            raise excs.Error(
+                'create_table(): Creating a table directly from a cloud URI is not supported.'
+                ' Please replicate the table locally first using `pxt.replicate()`:\n'
+                "replica_tbl = pxt.replicate('pxt://path/to/remote_table', 'local_replica_name')\n"
+                "pxt.create_table('new_table_name', source=replica_tbl)"
+            )
         tds = UnkTableDataConduit(source, source_format=source_format, extra_fields=extra_args)
         tds.check_source_format()
         data_source = tds.specialize()
@@ -179,35 +188,43 @@ def create_table(
             'Unable to create a proper schema from supplied `source`. Please use appropriate `schema_overrides`.'
         )
-    table, was_created = Catalog.get().create_table(
+    tbl, was_created = Catalog.get().create_table(
         path_obj,
         schema,
-        data_source.pxt_df if isinstance(data_source, DFTableDataConduit) else None,
         if_exists=if_exists_,
         primary_key=primary_key,
         comment=comment,
         media_validation=media_validation_,
         num_retained_versions=num_retained_versions,
+        create_default_idxs=create_default_idxs,
     )
-    if was_created and data_source is not None and not is_direct_df:
+    # TODO: combine data loading with table creation into a single transaction
+    if was_created:
         fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
-        table.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
+        if isinstance(data_source, DFTableDataConduit):
+            df = data_source.pxt_df
+            with Catalog.get().begin_xact(tbl=tbl._tbl_version_path, for_write=True, lock_mutable_tree=True):
+                tbl._tbl_version.get().insert(None, df, fail_on_exception=fail_on_exception)
+        elif data_source is not None and not is_direct_df:
+            tbl.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
-    return table
+    return tbl
 def create_view(
     path: str,
     base: catalog.Table | DataFrame,
     *,
-    additional_columns: Optional[dict[str, Any]] = None,
+    additional_columns: dict[str, Any] | None = None,
     is_snapshot: bool = False,
-    iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
+    create_default_idxs: bool = False,
+    iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
     num_retained_versions: int = 10,
     comment: str = '',
     media_validation: Literal['on_read', 'on_write'] = 'on_write',
     if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
-) -> Optional[catalog.Table]:
+) -> catalog.Table | None:
     """Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
     Args:
@@ -220,6 +237,8 @@ def create_view(
             [`create_table`][pixeltable.create_table].
         is_snapshot: Whether the view is a snapshot. Setting this to `True` is equivalent to calling
             [`create_snapshot`][pixeltable.create_snapshot].
+        create_default_idxs: Whether to create default indexes on the view's columns (the base's columns are excluded).
+            Cannot be `True` for snapshots.
         iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
             the base table.
         num_retained_versions: Number of versions of the view to retain.
@@ -267,9 +286,11 @@ def create_view(
         >>> tbl = pxt.get_table('my_table')
         ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
     """
+    if is_snapshot and create_default_idxs is True:
+        raise excs.Error('Cannot create default indexes on a snapshot')
     tbl_version_path: TableVersionPath
-    select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]] = None
-    where: Optional[exprs.Expr] = None
+    select_list: list[tuple[exprs.Expr, str | None]] | None = None
+    where: exprs.Expr | None = None
     if isinstance(base, catalog.Table):
         tbl_version_path = base._tbl_version_path
         sample_clause = None
@@ -297,7 +318,7 @@ def create_view(
             if col_name in [c.name for c in tbl_version_path.columns()]:
                 raise excs.Error(
                     f'Column {col_name!r} already exists in the base table '
-                    f'{tbl_version_path.get_column(col_name).tbl.name}.'
+                    f'{tbl_version_path.get_column(col_name).get_tbl().name}.'
                 )
     return Catalog.get().create_view(
@@ -308,6 +329,7 @@ def create_view(
         sample_clause=sample_clause,
         additional_columns=additional_columns,
         is_snapshot=is_snapshot,
+        create_default_idxs=create_default_idxs,
         iterator=iterator,
         num_retained_versions=num_retained_versions,
         comment=comment,
@@ -320,13 +342,13 @@ def create_snapshot(
     path_str: str,
     base: catalog.Table | DataFrame,
     *,
-    additional_columns: Optional[dict[str, Any]] = None,
-    iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
+    additional_columns: dict[str, Any] | None = None,
+    iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
     num_retained_versions: int = 10,
     comment: str = '',
     media_validation: Literal['on_read', 'on_write'] = 'on_write',
     if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
-) -> Optional[catalog.Table]:
+) -> catalog.Table | None:
     """Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
     Args:
@@ -487,12 +509,28 @@ def get_table(path: str, if_not_exists: Literal['error', 'ignore'] = 'error') ->
     return tbl
-def move(path: str, new_path: str) -> None:
+def move(
+    path: str,
+    new_path: str,
+    *,
+    if_exists: Literal['error', 'ignore'] = 'error',
+    if_not_exists: Literal['error', 'ignore'] = 'error',
+) -> None:
     """Move a schema object to a new directory and/or rename a schema object.
     Args:
         path: absolute path to the existing schema object.
         new_path: absolute new path for the schema object.
+        if_exists: Directive regarding how to handle if a schema object already exists at the new path.
+            Must be one of the following:
+            - `'error'`: raise an error
+            - `'ignore'`: do nothing and return
+        if_not_exists: Directive regarding how to handle if the source path does not exist.
+            Must be one of the following:
+            - `'error'`: raise an error
+            - `'ignore'`: do nothing and return
     Raises:
         Error: If path does not exist or new_path already exists.
@@ -506,13 +544,16 @@ def move(path: str, new_path: str) -> None:
         >>>> pxt.move('dir1.my_table', 'dir1.new_name')
     """
+    if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
+    if if_exists_ not in (catalog.IfExistsParam.ERROR, catalog.IfExistsParam.IGNORE):
+        raise excs.Error("`if_exists` must be one of 'error' or 'ignore'")
+    if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
     if path == new_path:
         raise excs.Error('move(): source and destination cannot be identical')
     path_obj, new_path_obj = catalog.Path.parse(path), catalog.Path.parse(new_path)
     if path_obj.is_ancestor(new_path_obj):
         raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
-    cat = Catalog.get()
-    cat.move(path_obj, new_path_obj)
+    Catalog.get().move(path_obj, new_path_obj, if_exists_, if_not_exists_)
 def drop_table(
@@ -660,8 +701,8 @@ def _list_tables(dir_path: str = '', recursive: bool = True, allow_system_paths:
 def create_dir(
-    path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
-) -> Optional[catalog.Dir]:
+    path: str, *, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
+) -> catalog.Dir | None:
     """Create a directory.
     Args:
@@ -816,9 +857,7 @@ def ls(path: str = '') -> pd.DataFrame:
 def _extract_paths(
-    dir_entries: dict[str, Catalog.DirEntry],
-    parent: catalog.Path,
-    entry_type: Optional[type[catalog.SchemaObject]] = None,
+    dir_entries: dict[str, Catalog.DirEntry], parent: catalog.Path, entry_type: type[catalog.SchemaObject] | None = None
 ) -> list[catalog.Path]:
     """Convert nested dir_entries structure to a flattened list of paths."""
     matches: list[str]
@@ -928,7 +967,7 @@ def tools(*args: func.Function | func.tools.Tool) -> func.tools.Tools:
     return func.tools.Tools(tools=[arg if isinstance(arg, func.tools.Tool) else tool(arg) for arg in args])
-def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
+def tool(fn: func.Function, name: str | None = None, description: str | None = None) -> func.tools.Tool:
     """
     Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
     [pxt.tools()][pixeltable.tools] for more details.
@@ -949,11 +988,7 @@ def tool(fn: func.Function, name: Optional[str] = None, description: Optional[st
 def configure_logging(
-    *,
-    to_stdout: Optional[bool] = None,
-    level: Optional[int] = None,
-    add: Optional[str] = None,
-    remove: Optional[str] = None,
+    *, to_stdout: bool | None = None, level: int | None = None, add: str | None = None, remove: str | None = None
 ) -> None:
     """Configure logging.

pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

Potentially problematic release.

pixeltable 0.4.17py3-none-any.whl → 0.4.19py3-none-any.whl