PyPI - pixeltable - Versions diffs - 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl - Mend

pixeltable 0.2.26py3-none-any.whl → 0.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (245) hide show

pixeltable/__init__.py +83 -19
pixeltable/_query.py +1444 -0
pixeltable/_version.py +1 -0
pixeltable/catalog/__init__.py +7 -4
pixeltable/catalog/catalog.py +2394 -119
pixeltable/catalog/column.py +225 -104
pixeltable/catalog/dir.py +38 -9
pixeltable/catalog/globals.py +53 -34
pixeltable/catalog/insertable_table.py +265 -115
pixeltable/catalog/path.py +80 -17
pixeltable/catalog/schema_object.py +28 -43
pixeltable/catalog/table.py +1270 -677
pixeltable/catalog/table_metadata.py +103 -0
pixeltable/catalog/table_version.py +1270 -751
pixeltable/catalog/table_version_handle.py +109 -0
pixeltable/catalog/table_version_path.py +137 -42
pixeltable/catalog/tbl_ops.py +53 -0
pixeltable/catalog/update_status.py +191 -0
pixeltable/catalog/view.py +251 -134
pixeltable/config.py +215 -0
pixeltable/env.py +736 -285
pixeltable/exceptions.py +26 -2
pixeltable/exec/__init__.py +7 -2
pixeltable/exec/aggregation_node.py +39 -21
pixeltable/exec/cache_prefetch_node.py +87 -109
pixeltable/exec/cell_materialization_node.py +268 -0
pixeltable/exec/cell_reconstruction_node.py +168 -0
pixeltable/exec/component_iteration_node.py +25 -28
pixeltable/exec/data_row_batch.py +11 -46
pixeltable/exec/exec_context.py +26 -11
pixeltable/exec/exec_node.py +35 -27
pixeltable/exec/expr_eval/__init__.py +3 -0
pixeltable/exec/expr_eval/evaluators.py +365 -0
pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
pixeltable/exec/expr_eval/globals.py +200 -0
pixeltable/exec/expr_eval/row_buffer.py +74 -0
pixeltable/exec/expr_eval/schedulers.py +413 -0
pixeltable/exec/globals.py +35 -0
pixeltable/exec/in_memory_data_node.py +35 -27
pixeltable/exec/object_store_save_node.py +293 -0
pixeltable/exec/row_update_node.py +44 -29
pixeltable/exec/sql_node.py +414 -115
pixeltable/exprs/__init__.py +8 -5
pixeltable/exprs/arithmetic_expr.py +79 -45
pixeltable/exprs/array_slice.py +5 -5
pixeltable/exprs/column_property_ref.py +40 -26
pixeltable/exprs/column_ref.py +254 -61
pixeltable/exprs/comparison.py +14 -9
pixeltable/exprs/compound_predicate.py +9 -10
pixeltable/exprs/data_row.py +213 -72
pixeltable/exprs/expr.py +270 -104
pixeltable/exprs/expr_dict.py +6 -5
pixeltable/exprs/expr_set.py +20 -11
pixeltable/exprs/function_call.py +383 -284
pixeltable/exprs/globals.py +18 -5
pixeltable/exprs/in_predicate.py +7 -7
pixeltable/exprs/inline_expr.py +37 -37
pixeltable/exprs/is_null.py +8 -4
pixeltable/exprs/json_mapper.py +120 -54
pixeltable/exprs/json_path.py +90 -60
pixeltable/exprs/literal.py +61 -16
pixeltable/exprs/method_ref.py +7 -6
pixeltable/exprs/object_ref.py +19 -8
pixeltable/exprs/row_builder.py +238 -75
pixeltable/exprs/rowid_ref.py +53 -15
pixeltable/exprs/similarity_expr.py +65 -50
pixeltable/exprs/sql_element_cache.py +5 -5
pixeltable/exprs/string_op.py +107 -0
pixeltable/exprs/type_cast.py +25 -13
pixeltable/exprs/variable.py +2 -2
pixeltable/func/__init__.py +9 -5
pixeltable/func/aggregate_function.py +197 -92
pixeltable/func/callable_function.py +119 -35
pixeltable/func/expr_template_function.py +101 -48
pixeltable/func/function.py +375 -62
pixeltable/func/function_registry.py +20 -19
pixeltable/func/globals.py +6 -5
pixeltable/func/mcp.py +74 -0
pixeltable/func/query_template_function.py +151 -35
pixeltable/func/signature.py +178 -49
pixeltable/func/tools.py +164 -0
pixeltable/func/udf.py +176 -53
pixeltable/functions/__init__.py +44 -4
pixeltable/functions/anthropic.py +226 -47
pixeltable/functions/audio.py +148 -11
pixeltable/functions/bedrock.py +137 -0
pixeltable/functions/date.py +188 -0
pixeltable/functions/deepseek.py +113 -0
pixeltable/functions/document.py +81 -0
pixeltable/functions/fal.py +76 -0
pixeltable/functions/fireworks.py +72 -20
pixeltable/functions/gemini.py +249 -0
pixeltable/functions/globals.py +208 -53
pixeltable/functions/groq.py +108 -0
pixeltable/functions/huggingface.py +1088 -95
pixeltable/functions/image.py +155 -84
pixeltable/functions/json.py +8 -11
pixeltable/functions/llama_cpp.py +31 -19
pixeltable/functions/math.py +169 -0
pixeltable/functions/mistralai.py +50 -75
pixeltable/functions/net.py +70 -0
pixeltable/functions/ollama.py +29 -36
pixeltable/functions/openai.py +548 -160
pixeltable/functions/openrouter.py +143 -0
pixeltable/functions/replicate.py +15 -14
pixeltable/functions/reve.py +250 -0
pixeltable/functions/string.py +310 -85
pixeltable/functions/timestamp.py +37 -19
pixeltable/functions/together.py +77 -120
pixeltable/functions/twelvelabs.py +188 -0
pixeltable/functions/util.py +7 -2
pixeltable/functions/uuid.py +30 -0
pixeltable/functions/video.py +1528 -117
pixeltable/functions/vision.py +26 -26
pixeltable/functions/voyageai.py +289 -0
pixeltable/functions/whisper.py +19 -10
pixeltable/functions/whisperx.py +179 -0
pixeltable/functions/yolox.py +112 -0
pixeltable/globals.py +716 -236
pixeltable/index/__init__.py +3 -1
pixeltable/index/base.py +17 -21
pixeltable/index/btree.py +32 -22
pixeltable/index/embedding_index.py +155 -92
pixeltable/io/__init__.py +12 -7
pixeltable/io/datarows.py +140 -0
pixeltable/io/external_store.py +83 -125
pixeltable/io/fiftyone.py +24 -33
pixeltable/io/globals.py +47 -182
pixeltable/io/hf_datasets.py +96 -127
pixeltable/io/label_studio.py +171 -156
pixeltable/io/lancedb.py +3 -0
pixeltable/io/pandas.py +136 -115
pixeltable/io/parquet.py +40 -153
pixeltable/io/table_data_conduit.py +702 -0
pixeltable/io/utils.py +100 -0
pixeltable/iterators/__init__.py +8 -4
pixeltable/iterators/audio.py +207 -0
pixeltable/iterators/base.py +9 -3
pixeltable/iterators/document.py +144 -87
pixeltable/iterators/image.py +17 -38
pixeltable/iterators/string.py +15 -12
pixeltable/iterators/video.py +523 -127
pixeltable/metadata/__init__.py +33 -8
pixeltable/metadata/converters/convert_10.py +2 -3
pixeltable/metadata/converters/convert_13.py +2 -2
pixeltable/metadata/converters/convert_15.py +15 -11
pixeltable/metadata/converters/convert_16.py +4 -5
pixeltable/metadata/converters/convert_17.py +4 -5
pixeltable/metadata/converters/convert_18.py +4 -6
pixeltable/metadata/converters/convert_19.py +6 -9
pixeltable/metadata/converters/convert_20.py +3 -6
pixeltable/metadata/converters/convert_21.py +6 -8
pixeltable/metadata/converters/convert_22.py +3 -2
pixeltable/metadata/converters/convert_23.py +33 -0
pixeltable/metadata/converters/convert_24.py +55 -0
pixeltable/metadata/converters/convert_25.py +19 -0
pixeltable/metadata/converters/convert_26.py +23 -0
pixeltable/metadata/converters/convert_27.py +29 -0
pixeltable/metadata/converters/convert_28.py +13 -0
pixeltable/metadata/converters/convert_29.py +110 -0
pixeltable/metadata/converters/convert_30.py +63 -0
pixeltable/metadata/converters/convert_31.py +11 -0
pixeltable/metadata/converters/convert_32.py +15 -0
pixeltable/metadata/converters/convert_33.py +17 -0
pixeltable/metadata/converters/convert_34.py +21 -0
pixeltable/metadata/converters/convert_35.py +9 -0
pixeltable/metadata/converters/convert_36.py +38 -0
pixeltable/metadata/converters/convert_37.py +15 -0
pixeltable/metadata/converters/convert_38.py +39 -0
pixeltable/metadata/converters/convert_39.py +124 -0
pixeltable/metadata/converters/convert_40.py +73 -0
pixeltable/metadata/converters/convert_41.py +12 -0
pixeltable/metadata/converters/convert_42.py +9 -0
pixeltable/metadata/converters/convert_43.py +44 -0
pixeltable/metadata/converters/util.py +44 -18
pixeltable/metadata/notes.py +21 -0
pixeltable/metadata/schema.py +185 -42
pixeltable/metadata/utils.py +74 -0
pixeltable/mypy/__init__.py +3 -0
pixeltable/mypy/mypy_plugin.py +123 -0
pixeltable/plan.py +616 -225
pixeltable/share/__init__.py +3 -0
pixeltable/share/packager.py +797 -0
pixeltable/share/protocol/__init__.py +33 -0
pixeltable/share/protocol/common.py +165 -0
pixeltable/share/protocol/operation_types.py +33 -0
pixeltable/share/protocol/replica.py +119 -0
pixeltable/share/publish.py +349 -0
pixeltable/store.py +398 -232
pixeltable/type_system.py +730 -267
pixeltable/utils/__init__.py +40 -0
pixeltable/utils/arrow.py +201 -29
pixeltable/utils/av.py +298 -0
pixeltable/utils/azure_store.py +346 -0
pixeltable/utils/coco.py +26 -27
pixeltable/utils/code.py +4 -4
pixeltable/utils/console_output.py +46 -0
pixeltable/utils/coroutine.py +24 -0
pixeltable/utils/dbms.py +92 -0
pixeltable/utils/description_helper.py +11 -12
pixeltable/utils/documents.py +60 -61
pixeltable/utils/exception_handler.py +36 -0
pixeltable/utils/filecache.py +38 -22
pixeltable/utils/formatter.py +88 -51
pixeltable/utils/gcs_store.py +295 -0
pixeltable/utils/http.py +133 -0
pixeltable/utils/http_server.py +14 -13
pixeltable/utils/iceberg.py +13 -0
pixeltable/utils/image.py +17 -0
pixeltable/utils/lancedb.py +90 -0
pixeltable/utils/local_store.py +322 -0
pixeltable/utils/misc.py +5 -0
pixeltable/utils/object_stores.py +573 -0
pixeltable/utils/pydantic.py +60 -0
pixeltable/utils/pytorch.py +20 -20
pixeltable/utils/s3_store.py +527 -0
pixeltable/utils/sql.py +32 -5
pixeltable/utils/system.py +30 -0
pixeltable/utils/transactional_directory.py +4 -3
pixeltable-0.5.7.dist-info/METADATA +579 -0
pixeltable-0.5.7.dist-info/RECORD +227 -0
{pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
pixeltable/__version__.py +0 -3
pixeltable/catalog/named_function.py +0 -36
pixeltable/catalog/path_dict.py +0 -141
pixeltable/dataframe.py +0 -894
pixeltable/exec/expr_eval_node.py +0 -232
pixeltable/ext/__init__.py +0 -14
pixeltable/ext/functions/__init__.py +0 -8
pixeltable/ext/functions/whisperx.py +0 -77
pixeltable/ext/functions/yolox.py +0 -157
pixeltable/tool/create_test_db_dump.py +0 -311
pixeltable/tool/create_test_video.py +0 -81
pixeltable/tool/doc_plugins/griffe.py +0 -50
pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
pixeltable/tool/embed_udf.py +0 -9
pixeltable/tool/mypy_plugin.py +0 -55
pixeltable/utils/media_store.py +0 -76
pixeltable/utils/s3.py +0 -16
pixeltable-0.2.26.dist-info/METADATA +0 -400
pixeltable-0.2.26.dist-info/RECORD +0 -156
pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
{pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0

pixeltable/iterators/video.py CHANGED Viewed

@@ -1,15 +1,23 @@
+import glob
 import logging
 import math
+import subprocess
 from fractions import Fraction
 from pathlib import Path
-from typing import Any, Optional, Sequence
+from typing import Any, Iterator, Literal
-import av  # type: ignore[import-untyped]
+import av
 import pandas as pd
 import PIL.Image
+from av.container import InputContainer
+from deprecated import deprecated
+import pixeltable as pxt
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
+import pixeltable.utils.av as av_utils
+from pixeltable.env import Env
+from pixeltable.utils.local_store import TempStore
 from .base import ComponentIterator
@@ -18,42 +26,78 @@ _logger = logging.getLogger('pixeltable')
 class FrameIterator(ComponentIterator):
     """
-    Iterator over frames of a video. At most one of `fps` or `num_frames` may be specified. If `fps` is specified,
-    then frames will be extracted at the specified rate (frames per second). If `num_frames` is specified, then the
-    exact number of frames will be extracted. If neither is specified, then all frames will be extracted. The first
-    frame of the video will always be extracted, and the remaining frames will be spaced as evenly as possible.
+    Iterator over frames of a video. At most one of `fps`, `num_frames`, or `keyframes_only` may be specified. If `fps`
+    is specified, then frames will be extracted at the specified rate (frames per second). If `num_frames` is specified,
+    then the exact number of frames will be extracted. If neither is specified, then all frames will be extracted.
+    If `fps` or `num_frames` is large enough to exceed the native framerate of the video, then all frames will be
+    extracted. (Frames will never be duplicated; the maximum number of frames extracted is the total number of frames
+    in the video.)
     Args:
-        video: URL or path of the video to use for frame extraction.
-        fps: Number of frames to extract per second of video. This may be a fractional value, such as 0.5.
-            If omitted or set to 0.0, then the native framerate of the video will be used (all frames will be
-            extracted). If `fps` is greater than the frame rate of the video, an error will be raised.
-        num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
-            `num_frames` is greater than the number of frames in the video, all frames will be extracted.
+        fps: Number of frames to extract per second of video. This may be a fractional value, such as `0.5` (one frame
+            per two seconds). The first frame of the video will always be extracted.
+        num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible: the video will
+            be divided into `num_frames` evenly spaced intervals, and the midpoint of each interval will be used for
+            frame extraction.
+        keyframes_only: If True, only extract keyframes.
+        all_frame_attrs:
+            If True, outputs a `pxt.Json` column `frame_attrs` with the following `pyav`-provided attributes
+            (for more information, see `pyav`'s documentation on
+            [VideoFrame](https://pyav.org/docs/develop/api/video.html#module-av.video.frame) and
+            [Frame](https://pyav.org/docs/develop/api/frame.html)):
+            * `index` (`int`)
+            * `pts` (`int | None`)
+            * `dts` (`int | None`)
+            * `time` (`float | None`)
+            * `is_corrupt` (`bool`)
+            * `key_frame` (`bool`)
+            * `pict_type` (`int`)
+            * `interlaced_frame` (`bool`)
+            If False, only outputs frame attributes `frame_idx`, `pos_msec`, and `pos_frame` as separate columns.
     """
     # Input parameters
     video_path: Path
-    fps: Optional[float]
-    num_frames: Optional[int]
+    fps: float | None
+    num_frames: int | None
+    keyframes_only: bool
+    all_frame_attrs: bool
     # Video info
-    container: av.container.input.InputContainer
-    video_framerate: Fraction
+    container: InputContainer
     video_time_base: Fraction
-    video_frame_count: int
-    video_start_time: int
+    video_start_time: float
+    video_duration: float | None
+    # extraction info
+    extraction_step: float | None
+    next_extraction_time: float | None
-    # List of frame indices to be extracted, or None to extract all frames
-    frames_to_extract: Optional[list[int]]
+    # state
+    pos: int
+    video_idx: int
+    cur_frame: av.VideoFrame | None
-    # Next frame to extract, as an iterator `pos` index. If `frames_to_extract` is None, this is the same as the
-    # frame index in the video. Otherwise, the corresponding video index is `frames_to_extract[next_pos]`.
-    next_pos: int
+    def __init__(
+        self,
+        video: str,
+        *,
+        fps: float | None = None,
+        num_frames: int | None = None,
+        keyframes_only: bool = False,
+        all_frame_attrs: bool = False,
+    ):
+        if int(fps is not None) + int(num_frames is not None) + int(keyframes_only) > 1:
+            raise excs.Error('At most one of `fps`, `num_frames` or `keyframes_only` may be specified')
-    def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
-        if fps is not None and num_frames is not None:
-            raise excs.Error('At most one of `fps` or `num_frames` may be specified')
+        if fps is not None and fps < 0.0:
+            raise excs.Error('`fps` must be a non-negative number')
+        if fps == 0.0:
+            fps = None  # treat 0.0 as unspecified
         video_path = Path(video)
         assert video_path.exists() and video_path.is_file()
@@ -61,55 +105,57 @@ class FrameIterator(ComponentIterator):
         self.container = av.open(str(video_path))
         self.fps = fps
         self.num_frames = num_frames
+        self.keyframes_only = keyframes_only
+        self.all_frame_attrs = all_frame_attrs
-        self.video_framerate = self.container.streams.video[0].average_rate
         self.video_time_base = self.container.streams.video[0].time_base
-        self.video_start_time = self.container.streams.video[0].start_time or 0
-        # Determine the number of frames in the video
-        self.video_frame_count = self.container.streams.video[0].frames
-        if self.video_frame_count == 0:
-            # The video codec does not provide a frame count in the standard `frames` field. Try some other methods.
-            metadata: dict = self.container.streams.video[0].metadata
-            if 'NUMBER_OF_FRAMES' in metadata:
-                self.video_frame_count = int(metadata['NUMBER_OF_FRAMES'])
-            elif 'DURATION' in metadata:
-                # As a last resort, calculate the frame count from the stream duration.
-                duration = metadata['DURATION']
-                assert isinstance(duration, str)
-                seconds = pd.to_timedelta(duration).total_seconds()
-                # Usually the duration and framerate are precise enough for this calculation to be accurate, but if
-                # we encounter a case where it's off by one due to a rounding error, that's ok; we only use this
-                # to determine the positions of the sampled frames when `fps` or `num_frames` is specified.
-                self.video_frame_count = round(seconds * self.video_framerate)
-            else:
-                raise excs.Error(f'Video {video}: failed to get number of frames')
-        if num_frames is not None:
-            # specific number of frames
-            if num_frames > self.video_frame_count:
-                # Extract all frames
-                self.frames_to_extract = None
-            else:
-                spacing = float(self.video_frame_count) / float(num_frames)
-                self.frames_to_extract = list(round(i * spacing) for i in range(num_frames))
-                assert len(self.frames_to_extract) == num_frames
+        start_time = self.container.streams.video[0].start_time or 0
+        self.video_start_time = float(start_time * self.video_time_base)
+        duration_pts: int | None = self.container.streams.video[0].duration
+        if duration_pts is not None:
+            self.video_duration = float(duration_pts * self.video_time_base)
         else:
-            if fps is None or fps == 0.0:
-                # Extract all frames
-                self.frames_to_extract = None
-            elif fps > float(self.video_framerate):
-                raise excs.Error(
-                    f'Video {video}: requested fps ({fps}) exceeds that of the video ({float(self.video_framerate)})'
-                )
+            # As a backup, try to calculate duration from DURATION metadata field
+            metadata = self.container.streams.video[0].metadata
+            duration_field = metadata.get('DURATION')  # A string like "00:01:23"
+            if duration_field is not None:
+                assert isinstance(duration_field, str)
+                self.video_duration = pd.to_timedelta(duration_field).total_seconds()
             else:
-                # Extract frames at the implied frequency
-                freq = fps / float(self.video_framerate)
-                n = math.ceil(self.video_frame_count * freq)  # number of frames to extract
-                self.frames_to_extract = list(round(i / freq) for i in range(n))
+                # TODO: Anything we can do here? Other methods of determining the duration are expensive and
+                #     not so appropriate for an iterator initializer.
+                self.video_duration = None
-        _logger.debug(f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames}')
-        self.next_pos = 0
+        if self.video_duration is None and self.num_frames is not None:
+            raise excs.Error(f'Could not determine duration of video: {video}')
+        # If self.fps or self.num_frames is specified, we cannot rely on knowing in advance which frame positions will
+        # be needed, since for variable framerate videos we do not know in advance the precise timestamp of each frame.
+        # The strategy is: predetermine a list of "extraction times", the idealized timestamps of the frames we want to
+        # materialize. As we later iterate through the frames, we will choose the frames that are closest to these
+        # idealized timestamps.
+        self.pos = 0
+        self.video_idx = 0
+        if self.num_frames is not None:
+            # Divide the video duration into num_frames evenly spaced intervals. The extraction times are the midpoints
+            # of those intervals.
+            self.extraction_step = (self.video_duration - self.video_start_time) / self.num_frames
+            self.next_extraction_time = self.video_start_time + self.extraction_step / 2
+        elif self.fps is not None:
+            self.extraction_step = 1 / self.fps
+            self.next_extraction_time = self.video_start_time
+        else:
+            self.extraction_step = None
+            self.next_extraction_time = None
+        _logger.debug(
+            f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames} '
+            f'keyframes_only={self.keyframes_only}'
+        )
+        self.cur_frame = self.next_frame()
     @classmethod
     def input_schema(cls) -> dict[str, ts.ColumnType]:
@@ -117,77 +163,427 @@ class FrameIterator(ComponentIterator):
             'video': ts.VideoType(nullable=False),
             'fps': ts.FloatType(nullable=True),
             'num_frames': ts.IntType(nullable=True),
+            'keyframes_only': ts.BoolType(nullable=False),
+            'all_frame_attrs': ts.BoolType(nullable=False),
         }
     @classmethod
     def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
-        return {
-            'frame_idx': ts.IntType(),
-            'pos_msec': ts.FloatType(),
-            'pos_frame': ts.IntType(),
-            'frame': ts.ImageType(),
-        }, ['frame']
+        attrs: dict[str, ts.ColumnType]
+        fps = kwargs.get('fps')
+        if fps is not None and (not isinstance(fps, (int, float)) or fps < 0.0):
+            raise excs.Error('`fps` must be a non-negative number')
-    def __next__(self) -> dict[str, Any]:
-        # Determine the frame index in the video corresponding to the iterator index `next_pos`;
-        # the frame at this index is the one we want to extract next
-        if self.frames_to_extract is None:
-            next_video_idx = self.next_pos  # we're extracting all frames
-        elif self.next_pos >= len(self.frames_to_extract):
-            raise StopIteration
+        if kwargs.get('all_frame_attrs'):
+            attrs = {'frame_attrs': ts.JsonType()}
         else:
-            next_video_idx = self.frames_to_extract[self.next_pos]
-        # We are searching for the frame at the index implied by `next_pos`. Step through the video until we
-        # find it. There are two reasons why it might not be the immediate next frame in the video:
-        # (1) `fps` or `num_frames` was specified as an iterator argument; or
-        # (2) we just did a seek, and the desired frame is not a keyframe.
-        # TODO: In case (1) it will usually be fastest to step through the frames until we find the one we're
-        #     looking for. But in some cases it may be faster to do a seek; for example, when `fps` is very
-        #     low and there are multiple keyframes in between each frame we want to extract (imagine extracting
-        #     10 frames from an hourlong video).
+            attrs = {'frame_idx': ts.IntType(), 'pos_msec': ts.FloatType(), 'pos_frame': ts.IntType()}
+        return {**attrs, 'frame': ts.ImageType()}, ['frame']
+    def next_frame(self) -> av.VideoFrame | None:
+        try:
+            return next(self.container.decode(video=0))
+        except EOFError:
+            return None
+    def __next__(self) -> dict[str, Any]:
         while True:
-            try:
-                frame = next(self.container.decode(video=0))
-            except EOFError:
+            if self.cur_frame is None:
                 raise StopIteration
-            # Compute the index of the current frame in the video based on the presentation timestamp (pts);
-            # this ensures we have a canonical understanding of frame index, regardless of how we got here
-            # (seek or iteration)
-            pts = frame.pts - self.video_start_time
-            video_idx = round(pts * self.video_time_base * self.video_framerate)
-            assert isinstance(video_idx, int)
-            if video_idx < next_video_idx:
-                # We haven't reached the desired frame yet
+            next_frame = self.next_frame()
+            if self.keyframes_only and not self.cur_frame.key_frame:
+                self.cur_frame = next_frame
+                self.video_idx += 1
                 continue
-            # Sanity check that we're at the right frame.
-            if video_idx != next_video_idx:
-                raise excs.Error(f'Frame {next_video_idx} is missing from the video (video file is corrupt)')
-            img = frame.to_image()
+            cur_frame_pts = self.cur_frame.pts
+            cur_frame_time = float(cur_frame_pts * self.video_time_base)
+            if self.extraction_step is not None:
+                # We are targeting a specified list of extraction times (because fps or num_frames was specified).
+                assert self.next_extraction_time is not None
+                if next_frame is None:
+                    # cur_frame is the last frame of the video. If it is before the next extraction time, then we
+                    # have reached the end of the video.
+                    if cur_frame_time < self.next_extraction_time:
+                        raise StopIteration
+                else:
+                    # The extraction time represents the idealized timestamp of the next frame we want to extract.
+                    # If next_frame is *closer* to it than cur_frame, then we skip cur_frame.
+                    # The following logic handles all three cases:
+                    # - next_extraction_time is before cur_frame_time (never skips)
+                    # - next_extraction_time is after next_frame_time (always skips)
+                    # - next_extraction_time is between cur_frame_time and next_frame_time (depends on which is closer)
+                    next_frame_pts = next_frame.pts
+                    next_frame_time = float(next_frame_pts * self.video_time_base)
+                    if next_frame_time - self.next_extraction_time < self.next_extraction_time - cur_frame_time:
+                        self.cur_frame = next_frame
+                        self.video_idx += 1
+                        continue
+            img = self.cur_frame.to_image()
             assert isinstance(img, PIL.Image.Image)
-            pos_msec = float(pts * self.video_time_base * 1000)
-            result = {
-                'frame_idx': self.next_pos,
-                'pos_msec': pos_msec,
-                'pos_frame': video_idx,
-                'frame': img,
-            }
-            self.next_pos += 1
+            result: dict[str, Any] = {'frame': img}
+            if self.all_frame_attrs:
+                attrs = {
+                    'index': self.video_idx,
+                    'pts': cur_frame_pts,
+                    'dts': self.cur_frame.dts,
+                    'time': float(cur_frame_pts * self.video_time_base),
+                    'is_corrupt': self.cur_frame.is_corrupt,
+                    'key_frame': self.cur_frame.key_frame,
+                    'pict_type': self.cur_frame.pict_type,
+                    'interlaced_frame': self.cur_frame.interlaced_frame,
+                }
+                result['frame_attrs'] = attrs
+            else:
+                pos_msec = float(cur_frame_pts * self.video_time_base * 1000 - self.video_start_time)
+                result.update({'frame_idx': self.pos, 'pos_msec': pos_msec, 'pos_frame': self.video_idx})
+            self.cur_frame = next_frame
+            self.video_idx += 1
+            self.pos += 1
+            if self.extraction_step is not None:
+                self.next_extraction_time += self.extraction_step
             return result
     def close(self) -> None:
         self.container.close()
-    def set_pos(self, pos: int) -> None:
-        if pos == self.next_pos:
-            return  # already there
-        video_idx = pos if self.frames_to_extract is None else self.frames_to_extract[pos]
-        _logger.debug(f'seeking to frame number {video_idx} (at iterator index {pos})')
-        # compute the frame position in time_base units
-        seek_pos = int(video_idx / self.video_framerate / self.video_time_base + self.video_start_time)
-        # This will seek to the nearest keyframe before the desired frame. If the frame being sought is not a keyframe,
-        # then the iterator will step forward to the desired frame on the subsequent call to next().
-        self.container.seek(seek_pos, backward=True, stream=self.container.streams.video[0])
-        self.next_pos = pos
+    def set_pos(self, pos: int, **kwargs: Any) -> None:
+        assert next(iter(kwargs.values()), None) is not None
+        if self.pos == pos:
+            # Nothing to do
+            return
+        self.pos = pos
+        seek_time: float
+        if 'pos_msec' in kwargs:
+            self.video_idx = kwargs['pos_frame']
+            seek_time = kwargs['pos_msec'] / 1000.0 + self.video_start_time
+        else:
+            assert 'frame_attrs' in kwargs
+            self.video_idx = kwargs['frame_attrs']['index']
+            seek_time = kwargs['frame_attrs']['time']
+        assert isinstance(self.video_idx, int)
+        assert isinstance(seek_time, float)
+        seek_pts = math.floor(seek_time / self.video_time_base)
+        self.container.seek(seek_pts, backward=True, stream=self.container.streams.video[0])
+        self.cur_frame = self.next_frame()
+        while self.cur_frame is not None and float(self.cur_frame.pts * self.video_time_base) < seek_time - 1e-3:
+            self.cur_frame = self.next_frame()
+        assert self.cur_frame is None or abs(float(self.cur_frame.pts * self.video_time_base) - seek_time) < 1e-3
+    @classmethod
+    @deprecated('create() is deprecated; use `pixeltable.functions.video.frame_iterator` instead', version='0.5.6')
+    def create(cls, **kwargs: Any) -> tuple[type[ComponentIterator], dict[str, Any]]:
+        return super()._create(**kwargs)
+class VideoSplitter(ComponentIterator):
+    """
+    Iterator over segments of a video file, which is split into segments. The segments are specified either via a
+    fixed duration or a list of split points.
+    Args:
+        duration: Video segment duration in seconds
+        overlap: Overlap between consecutive segments in seconds. Only available for `mode='fast'`.
+        min_segment_duration: Drop the last segment if it is smaller than min_segment_duration.
+        segment_times: List of timestamps (in seconds) in video where segments should be split. Note that these are not
+            segment durations. If all segment times are less than the duration of the video, produces exactly
+            `len(segment_times) + 1` segments. An argument of `[]` will produce a single segment containing the
+            entire video.
+        mode: Segmentation mode:
+            - `'fast'`: Quick segmentation using stream copy (splits only at keyframes, approximate durations)
+            - `'accurate'`: Precise segmentation with re-encoding (exact durations, slower)
+        video_encoder: Video encoder to use. If not specified, uses the default encoder for the current platform.
+            Only available for `mode='accurate'`.
+        video_encoder_args: Additional arguments to pass to the video encoder. Only available for `mode='accurate'`.
+    """
+    # Input parameters
+    video_path: Path
+    segment_duration: float | None
+    segment_times: list[float] | None  # [] is valid
+    overlap: float
+    min_segment_duration: float
+    video_encoder: str | None
+    video_encoder_args: dict[str, Any] | None
+    # Video metadata
+    video_time_base: Fraction
+    output_iter: Iterator[dict[str, Any]]
+    def __init__(
+        self,
+        video: str,
+        *,
+        duration: float | None = None,
+        overlap: float | None = None,
+        min_segment_duration: float | None = None,
+        segment_times: list[float] | None = None,
+        mode: Literal['fast', 'accurate'] = 'accurate',
+        video_encoder: str | None = None,
+        video_encoder_args: dict[str, Any] | None = None,
+    ):
+        Env.get().require_binary('ffmpeg')
+        self._check_args(
+            duration, segment_times, overlap, min_segment_duration, mode, video_encoder, video_encoder_args
+        )
+        assert (duration is not None) != (segment_times is not None)
+        if duration is not None:
+            assert duration > 0.0
+            assert duration >= min_segment_duration
+            assert overlap is None or overlap < duration
+        video_path = Path(video)
+        assert video_path.exists() and video_path.is_file()
+        self.video_path = video_path
+        self.segment_duration = duration
+        self.overlap = overlap if overlap is not None else 0.0
+        self.min_segment_duration = min_segment_duration if min_segment_duration is not None else 0.0
+        self.segment_times = segment_times
+        self.video_encoder = video_encoder
+        self.video_encoder_args = video_encoder_args
+        if self.segment_times is not None and len(self.segment_times) == 0:
+            self.output_iter = self.complete_video_iter()
+        else:
+            self.output_iter = self.fast_iter() if mode == 'fast' else self.accurate_iter()
+        with av.open(str(video_path)) as container:
+            self.video_time_base = container.streams.video[0].time_base
+        # TODO: check types of args
+    @classmethod
+    def input_schema(cls) -> dict[str, ts.ColumnType]:
+        return {
+            'video': ts.VideoType(nullable=False),
+            'duration': ts.FloatType(nullable=True),
+            'overlap': ts.FloatType(nullable=True),
+            'min_segment_duration': ts.FloatType(nullable=True),
+            'segment_times': ts.JsonType(nullable=True),
+            'mode': ts.StringType(nullable=False),
+            'video_encoder': ts.StringType(nullable=True),
+            'video_encoder_args': ts.JsonType(nullable=True),
+        }
+    @classmethod
+    def _check_args(
+        cls,
+        segment_duration: Any,
+        segment_times: Any,
+        overlap: Any,
+        min_segment_duration: Any,
+        mode: Any,
+        video_encoder: Any,
+        video_encoder_args: Any,
+    ) -> None:
+        if segment_duration is None and segment_times is None:
+            raise excs.Error('Must specify either duration or segment_times')
+        if segment_duration is not None and segment_times is not None:
+            raise excs.Error('duration and segment_times cannot both be specified')
+        if segment_times is not None and overlap is not None:
+            raise excs.Error('overlap cannot be specified with segment_times')
+        if segment_duration is not None and isinstance(segment_duration, (int, float)):
+            if segment_duration <= 0.0:
+                raise excs.Error(f'duration must be a positive number: {segment_duration}')
+            if (
+                min_segment_duration is not None
+                and isinstance(min_segment_duration, (int, float))
+                and segment_duration < min_segment_duration
+            ):
+                raise excs.Error(
+                    f'duration must be at least min_segment_duration: {segment_duration} < {min_segment_duration}'
+                )
+            if overlap is not None and isinstance(overlap, (int, float)) and overlap >= segment_duration:
+                raise excs.Error(f'overlap must be less than duration: {overlap} >= {segment_duration}')
+        if mode == 'accurate' and overlap is not None:
+            raise excs.Error("Cannot specify overlap for mode='accurate'")
+        if mode == 'fast':
+            if video_encoder is not None:
+                raise excs.Error("Cannot specify video_encoder for mode='fast'")
+            if video_encoder_args is not None:
+                raise excs.Error("Cannot specify video_encoder_args for mode='fast'")
+    @classmethod
+    def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
+        param_names = ['duration', 'overlap', 'min_segment_duration', 'segment_times']
+        params = dict(zip(param_names, args))
+        params.update(kwargs)
+        segment_duration = params.get('duration')
+        segment_times = params.get('segment_times')
+        overlap = params.get('overlap')
+        min_segment_duration = params.get('min_segment_duration')
+        mode = params.get('mode', 'accurate')
+        video_encoder = params.get('video_encoder')
+        video_encoder_args = params.get('video_encoder_args')
+        cls._check_args(
+            segment_duration, segment_times, overlap, min_segment_duration, mode, video_encoder, video_encoder_args
+        )
+        return {
+            'segment_start': ts.FloatType(nullable=True),
+            'segment_start_pts': ts.IntType(nullable=True),
+            'segment_end': ts.FloatType(nullable=True),
+            'segment_end_pts': ts.IntType(nullable=True),
+            'video_segment': ts.VideoType(nullable=False),
+        }, []
+    def complete_video_iter(self) -> Iterator[dict[str, Any]]:
+        """Returns the entire video as a single segment"""
+        assert len(self.segment_times) == 0
+        with av.open(str(self.video_path)) as container:
+            video_stream = container.streams.video[0]
+            start_ts = (
+                float(video_stream.start_time * video_stream.time_base)
+                if video_stream.start_time is not None and video_stream.time_base is not None
+                else 0.0
+            )
+            end_pts = (
+                video_stream.start_time + video_stream.duration
+                if video_stream.start_time is not None and video_stream.duration is not None
+                else None
+            )
+            end_ts = (
+                float(end_pts * video_stream.time_base)
+                if end_pts is not None and video_stream.time_base is not None
+                else 0.0
+            )
+            result = {
+                'segment_start': start_ts,
+                'segment_start_pts': video_stream.start_time,
+                'segment_end': end_ts,
+                'segment_end_pts': end_pts,
+                'video_segment': str(self.video_path),
+            }
+            yield result
+    def fast_iter(self) -> Iterator[dict[str, Any]]:
+        segment_path: str = ''
+        assert self.segment_times is None or len(self.segment_times) > 0
+        try:
+            start_time = 0.0
+            start_pts = 0
+            segment_idx = 0
+            while True:
+                target_duration: float | None
+                if self.segment_duration is not None:
+                    target_duration = self.segment_duration
+                elif self.segment_times is not None and segment_idx < len(self.segment_times):
+                    target_duration = self.segment_times[segment_idx] - start_time
+                else:
+                    target_duration = None  # the rest of the video
+                segment_path = str(TempStore.create_path(extension='.mp4'))
+                cmd = av_utils.ffmpeg_clip_cmd(str(self.video_path), segment_path, start_time, target_duration)
+                _ = subprocess.run(cmd, capture_output=True, text=True, check=True)
+                # use the actual duration
+                segment_duration = av_utils.get_video_duration(segment_path)
+                if segment_duration - self.overlap == 0.0 or segment_duration < self.min_segment_duration:
+                    # we're done
+                    Path(segment_path).unlink()
+                    return
+                segment_end = start_time + segment_duration
+                segment_end_pts = start_pts + round(segment_duration / self.video_time_base)
+                result = {
+                    'segment_start': start_time,
+                    'segment_start_pts': start_pts,
+                    'segment_end': segment_end,
+                    'segment_end_pts': segment_end_pts,
+                    'video_segment': segment_path,
+                }
+                yield result
+                start_time = segment_end - self.overlap
+                start_pts = segment_end_pts - round(self.overlap / self.video_time_base)
+                segment_idx += 1
+                if self.segment_times is not None and segment_idx > len(self.segment_times):
+                    # We've created all segments including the final segment after the last segment_time
+                    break
+        except subprocess.CalledProcessError as e:
+            if segment_path and Path(segment_path).exists():
+                Path(segment_path).unlink()
+            error_msg = f'ffmpeg failed with return code {e.returncode}'
+            if e.stderr:
+                error_msg += f': {e.stderr.strip()}'
+            raise pxt.Error(error_msg) from e
+    def accurate_iter(self) -> Iterator[dict[str, Any]]:
+        assert self.segment_times is None or len(self.segment_times) > 0
+        base_path = TempStore.create_path(extension='')
+        # Use ffmpeg -f segment for accurate segmentation with re-encoding
+        output_pattern = f'{base_path}_segment_%04d.mp4'
+        cmd = av_utils.ffmpeg_segment_cmd(
+            str(self.video_path),
+            output_pattern,
+            segment_duration=self.segment_duration,
+            segment_times=self.segment_times,
+            video_encoder=self.video_encoder,
+            video_encoder_args=self.video_encoder_args,
+        )
+        try:
+            _ = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            output_paths = sorted(glob.glob(f'{base_path}_segment_*.mp4'))
+            # TODO: is this actually an error?
+            # if len(output_paths) == 0:
+            #     stderr_output = result.stderr.strip() if result.stderr is not None else ''
+            #     raise pxt.Error(
+            #         f'ffmpeg failed to create output files for commandline: {" ".join(cmd)}\n{stderr_output}'
+            #     )
+            start_time = 0.0
+            start_pts = 0
+            for segment_path in output_paths:
+                segment_duration = av_utils.get_video_duration(segment_path)
+                if segment_duration < self.min_segment_duration:
+                    Path(segment_path).unlink()
+                    return
+                result = {
+                    'segment_start': start_time,
+                    'segment_start_pts': start_pts,
+                    'segment_end': start_time + segment_duration,
+                    'segment_end_pts': start_pts + round(segment_duration / self.video_time_base),
+                    'video_segment': segment_path,
+                }
+                yield result
+                start_time += segment_duration
+                start_pts += round(segment_duration / self.video_time_base)
+        except subprocess.CalledProcessError as e:
+            error_msg = f'ffmpeg failed with return code {e.returncode}'
+            if e.stderr:
+                error_msg += f': {e.stderr.strip()}'
+            raise pxt.Error(error_msg) from e
+    def __next__(self) -> dict[str, Any]:
+        return next(self.output_iter)
+    def close(self) -> None:
+        pass
+    @classmethod
+    @deprecated('create() is deprecated; use `pixeltable.functions.video.video_splitter` instead', version='0.5.6')
+    def create(cls, **kwargs: Any) -> tuple[type[ComponentIterator], dict[str, Any]]:
+        return super()._create(**kwargs)

pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

pixeltable 0.2.26py3-none-any.whl → 0.5.7py3-none-any.whl