pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/functions/video.py
CHANGED
|
@@ -1,75 +1,74 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Pixeltable
|
|
3
|
-
|
|
4
|
-
Example:
|
|
5
|
-
```python
|
|
6
|
-
import pixeltable as pxt
|
|
7
|
-
from pixeltable.functions import video as pxt_video
|
|
8
|
-
|
|
9
|
-
t = pxt.get_table(...)
|
|
10
|
-
t.select(pxt_video.extract_audio(t.video_col)).collect()
|
|
11
|
-
```
|
|
2
|
+
Pixeltable UDFs for `VideoType`.
|
|
12
3
|
"""
|
|
13
4
|
|
|
14
|
-
import
|
|
15
|
-
import
|
|
16
|
-
|
|
17
|
-
|
|
5
|
+
import glob
|
|
6
|
+
import logging
|
|
7
|
+
import pathlib
|
|
8
|
+
import subprocess
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Literal, NamedTuple, NoReturn
|
|
18
10
|
|
|
19
|
-
import av
|
|
11
|
+
import av
|
|
12
|
+
import av.container
|
|
20
13
|
import numpy as np
|
|
21
14
|
import PIL.Image
|
|
22
15
|
|
|
23
16
|
import pixeltable as pxt
|
|
24
|
-
import pixeltable.
|
|
17
|
+
import pixeltable.utils.av as av_utils
|
|
18
|
+
from pixeltable.env import Env
|
|
25
19
|
from pixeltable.utils.code import local_public_names
|
|
20
|
+
from pixeltable.utils.local_store import TempStore
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from scenedetect.detectors import SceneDetector # type: ignore[import-untyped]
|
|
24
|
+
|
|
25
|
+
_logger = logging.getLogger('pixeltable')
|
|
26
|
+
|
|
26
27
|
|
|
27
|
-
|
|
28
|
-
'wav': ('pcm_s16le', 'wav'),
|
|
29
|
-
'mp3': ('libmp3lame', 'mp3'),
|
|
30
|
-
'flac': ('flac', 'flac'),
|
|
31
|
-
#'mp4': ('aac', 'm4a'),
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
# for mp4:
|
|
35
|
-
# - extract_audio() fails with "Application provided invalid, non monotonically increasing dts to muxer in stream 0: 1146 >= 290"
|
|
36
|
-
# - chatgpt suggests this can be fixed in the following manner
|
|
37
|
-
# for packet in container.demux(audio_stream):
|
|
38
|
-
# packet.pts = None # Reset the PTS and DTS to allow FFmpeg to set them automatically
|
|
39
|
-
# packet.dts = None
|
|
40
|
-
# for frame in packet.decode():
|
|
41
|
-
# frame.pts = None
|
|
42
|
-
# for packet in output_stream.encode(frame):
|
|
43
|
-
# output_container.mux(packet)
|
|
44
|
-
#
|
|
45
|
-
# # Flush remaining packets
|
|
46
|
-
# for packet in output_stream.encode():
|
|
47
|
-
# output_container.mux(packet)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
@pxt.uda(
|
|
51
|
-
init_types=[pxt.IntType()],
|
|
52
|
-
update_types=[pxt.ImageType()],
|
|
53
|
-
value_type=pxt.VideoType(),
|
|
54
|
-
requires_order_by=True,
|
|
55
|
-
allows_window=False,
|
|
56
|
-
)
|
|
28
|
+
@pxt.uda(requires_order_by=True)
|
|
57
29
|
class make_video(pxt.Aggregator):
|
|
58
30
|
"""
|
|
59
|
-
|
|
31
|
+
Aggregate function that creates a video from a sequence of images, using the default video encoder and
|
|
32
|
+
yuv420p pixel format.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
fps: Frames per second for the output video.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
The video obtained by combining the input frames at the specified `fps`.
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
Combine the images in the `img` column of the table `tbl` into a video:
|
|
42
|
+
|
|
43
|
+
>>> tbl.select(make_video(tbl.img, fps=30)).collect()
|
|
44
|
+
|
|
45
|
+
Combine a sequence of rotated images into a video:
|
|
46
|
+
|
|
47
|
+
>>> tbl.select(make_video(tbl.img.rotate(45), fps=30)).collect()
|
|
48
|
+
|
|
49
|
+
For a more extensive example, see the
|
|
50
|
+
[Object Detection in Videos](https://docs.pixeltable.com/howto/cookbooks/video/object-detection-in-videos)
|
|
51
|
+
cookbook.
|
|
60
52
|
"""
|
|
53
|
+
|
|
54
|
+
# Based on: https://pyav.org/docs/develop/cookbook/numpy.html#generating-video
|
|
55
|
+
|
|
56
|
+
# TODO: provide parameters for video_encoder and pix_fmt
|
|
57
|
+
|
|
58
|
+
container: av.container.OutputContainer | None
|
|
59
|
+
stream: av.VideoStream | None
|
|
60
|
+
fps: int
|
|
61
|
+
|
|
61
62
|
def __init__(self, fps: int = 25):
|
|
62
|
-
|
|
63
|
-
self.
|
|
64
|
-
self.stream: Optional[av.stream.Stream] = None
|
|
63
|
+
self.container = None
|
|
64
|
+
self.stream = None
|
|
65
65
|
self.fps = fps
|
|
66
66
|
|
|
67
67
|
def update(self, frame: PIL.Image.Image) -> None:
|
|
68
68
|
if frame is None:
|
|
69
69
|
return
|
|
70
70
|
if self.container is None:
|
|
71
|
-
|
|
72
|
-
self.out_file = Path(output_filename)
|
|
71
|
+
self.out_file = TempStore.create_path(extension='.mp4')
|
|
73
72
|
self.container = av.open(str(self.out_file), mode='w')
|
|
74
73
|
self.stream = self.container.add_stream('h264', rate=self.fps)
|
|
75
74
|
self.stream.pix_fmt = 'yuv420p'
|
|
@@ -80,7 +79,7 @@ class make_video(pxt.Aggregator):
|
|
|
80
79
|
for packet in self.stream.encode(av_frame):
|
|
81
80
|
self.container.mux(packet)
|
|
82
81
|
|
|
83
|
-
def value(self) ->
|
|
82
|
+
def value(self) -> pxt.Video:
|
|
84
83
|
for packet in self.stream.encode():
|
|
85
84
|
self.container.mux(packet)
|
|
86
85
|
self.container.close()
|
|
@@ -89,103 +88,1515 @@ class make_video(pxt.Aggregator):
|
|
|
89
88
|
|
|
90
89
|
@pxt.udf(is_method=True)
|
|
91
90
|
def extract_audio(
|
|
92
|
-
video_path: pxt.Video, stream_idx: int = 0, format: str = 'wav', codec:
|
|
91
|
+
video_path: pxt.Video, stream_idx: int = 0, format: str = 'wav', codec: str | None = None
|
|
93
92
|
) -> pxt.Audio:
|
|
94
93
|
"""
|
|
95
|
-
Extract an audio stream from a video
|
|
94
|
+
Extract an audio stream from a video.
|
|
96
95
|
|
|
97
96
|
Args:
|
|
98
97
|
stream_idx: Index of the audio stream to extract.
|
|
99
98
|
format: The target audio format. (`'wav'`, `'mp3'`, `'flac'`).
|
|
100
99
|
codec: The codec to use for the audio stream. If not provided, a default codec will be used.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
The extracted audio.
|
|
103
|
+
|
|
104
|
+
Examples:
|
|
105
|
+
Add a computed column to a table `tbl` that extracts audio from an existing column `video_col`:
|
|
106
|
+
|
|
107
|
+
>>> tbl.add_computed_column(
|
|
108
|
+
... extracted_audio=tbl.video_col.extract_audio(format='flac')
|
|
109
|
+
... )
|
|
101
110
|
"""
|
|
102
|
-
if format not in
|
|
111
|
+
if format not in av_utils.AUDIO_FORMATS:
|
|
103
112
|
raise ValueError(f'extract_audio(): unsupported audio format: {format}')
|
|
104
|
-
default_codec, ext =
|
|
113
|
+
default_codec, ext = av_utils.AUDIO_FORMATS[format]
|
|
105
114
|
|
|
106
115
|
with av.open(video_path) as container:
|
|
107
116
|
if len(container.streams.audio) <= stream_idx:
|
|
108
117
|
return None
|
|
109
118
|
audio_stream = container.streams.audio[stream_idx]
|
|
110
119
|
# create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
|
|
111
|
-
|
|
120
|
+
output_path = str(TempStore.create_path(extension=f'.{ext}'))
|
|
112
121
|
|
|
113
|
-
with av.open(
|
|
122
|
+
with av.open(output_path, 'w', format=format) as output_container:
|
|
114
123
|
output_stream = output_container.add_stream(codec or default_codec)
|
|
124
|
+
assert isinstance(output_stream, av.audio.stream.AudioStream)
|
|
115
125
|
for packet in container.demux(audio_stream):
|
|
116
126
|
for frame in packet.decode():
|
|
117
|
-
output_container.mux(output_stream.encode(frame))
|
|
127
|
+
output_container.mux(output_stream.encode(frame)) # type: ignore[arg-type]
|
|
118
128
|
|
|
119
|
-
return
|
|
129
|
+
return output_path
|
|
120
130
|
|
|
121
131
|
|
|
122
132
|
@pxt.udf(is_method=True)
|
|
123
133
|
def get_metadata(video: pxt.Video) -> dict:
|
|
124
134
|
"""
|
|
125
135
|
Gets various metadata associated with a video file and returns it as a dictionary.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
video: The video for which to get metadata.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
A `dict` such as the following:
|
|
142
|
+
|
|
143
|
+
```json
|
|
144
|
+
{
|
|
145
|
+
'bit_exact': False,
|
|
146
|
+
'bit_rate': 967260,
|
|
147
|
+
'size': 2234371,
|
|
148
|
+
'metadata': {
|
|
149
|
+
'encoder': 'Lavf60.16.100',
|
|
150
|
+
'major_brand': 'isom',
|
|
151
|
+
'minor_version': '512',
|
|
152
|
+
'compatible_brands': 'isomiso2avc1mp41',
|
|
153
|
+
},
|
|
154
|
+
'streams': [
|
|
155
|
+
{
|
|
156
|
+
'type': 'video',
|
|
157
|
+
'width': 640,
|
|
158
|
+
'height': 360,
|
|
159
|
+
'frames': 462,
|
|
160
|
+
'time_base': 1.0 / 12800,
|
|
161
|
+
'duration': 236544,
|
|
162
|
+
'duration_seconds': 236544.0 / 12800,
|
|
163
|
+
'average_rate': 25.0,
|
|
164
|
+
'base_rate': 25.0,
|
|
165
|
+
'guessed_rate': 25.0,
|
|
166
|
+
'metadata': {
|
|
167
|
+
'language': 'und',
|
|
168
|
+
'handler_name': 'L-SMASH Video Handler',
|
|
169
|
+
'vendor_id': '[0][0][0][0]',
|
|
170
|
+
'encoder': 'Lavc60.31.102 libx264',
|
|
171
|
+
},
|
|
172
|
+
'codec_context': {'name': 'h264', 'codec_tag': 'avc1', 'profile': 'High', 'pix_fmt': 'yuv420p'},
|
|
173
|
+
}
|
|
174
|
+
],
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Examples:
|
|
179
|
+
Extract metadata for files in the `video_col` column of the table `tbl`:
|
|
180
|
+
|
|
181
|
+
>>> tbl.select(tbl.video_col.get_metadata()).collect()
|
|
182
|
+
"""
|
|
183
|
+
return av_utils.get_metadata(video)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@pxt.udf(is_method=True)
|
|
187
|
+
def get_duration(video: pxt.Video) -> float | None:
|
|
188
|
+
"""
|
|
189
|
+
Get video duration in seconds.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
video: The video for which to get the duration.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
The duration in seconds, or None if the duration cannot be determined.
|
|
196
|
+
"""
|
|
197
|
+
return av_utils.get_video_duration(video)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@pxt.udf(is_method=True)
|
|
201
|
+
def extract_frame(video: pxt.Video, *, timestamp: float) -> PIL.Image.Image | None:
|
|
202
|
+
"""
|
|
203
|
+
Extract a single frame from a video at a specific timestamp.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
video: The video from which to extract the frame.
|
|
207
|
+
timestamp: Extract frame at this timestamp (in seconds).
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
The extracted frame as a PIL Image, or None if the timestamp is beyond the video duration.
|
|
211
|
+
|
|
212
|
+
Examples:
|
|
213
|
+
Extract the first frame from each video in the `video` column of the table `tbl`:
|
|
214
|
+
|
|
215
|
+
>>> tbl.select(tbl.video.extract_frame(0.0)).collect()
|
|
216
|
+
|
|
217
|
+
Extract a frame close to the end of each video in the `video` column of the table `tbl`:
|
|
218
|
+
|
|
219
|
+
>>> tbl.select(
|
|
220
|
+
... tbl.video.extract_frame(
|
|
221
|
+
... tbl.video.get_metadata().streams[0].duration_seconds - 0.1
|
|
222
|
+
... )
|
|
223
|
+
... ).collect()
|
|
224
|
+
"""
|
|
225
|
+
if timestamp < 0:
|
|
226
|
+
raise ValueError("'timestamp' must be non-negative")
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
with av.open(str(video)) as container:
|
|
230
|
+
video_stream = container.streams.video[0]
|
|
231
|
+
time_base = float(video_stream.time_base)
|
|
232
|
+
start_time = video_stream.start_time or 0
|
|
233
|
+
duration = video_stream.duration
|
|
234
|
+
|
|
235
|
+
# Check if timestamp is beyond video duration
|
|
236
|
+
if duration is not None:
|
|
237
|
+
duration_seconds = float(duration * time_base)
|
|
238
|
+
if timestamp > duration_seconds:
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
# Convert timestamp to stream time base units
|
|
242
|
+
target_pts = int(timestamp / time_base) + start_time
|
|
243
|
+
|
|
244
|
+
# Seek to the nearest keyframe *before* our target timestamp
|
|
245
|
+
container.seek(target_pts, backward=True, stream=video_stream)
|
|
246
|
+
|
|
247
|
+
# Decode frames until we reach or pass the target timestamp
|
|
248
|
+
for frame in container.decode(video=0):
|
|
249
|
+
frame_pts = frame.pts
|
|
250
|
+
if frame_pts is None:
|
|
251
|
+
continue
|
|
252
|
+
frame_timestamp = (frame_pts - start_time) * time_base
|
|
253
|
+
if frame_timestamp >= timestamp:
|
|
254
|
+
return frame.to_image()
|
|
255
|
+
|
|
256
|
+
return None
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
raise pxt.Error(f'extract_frame(): failed to extract frame: {e}') from e
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _handle_ffmpeg_error(e: subprocess.CalledProcessError) -> NoReturn:
|
|
263
|
+
error_msg = f'ffmpeg failed with return code {e.returncode}'
|
|
264
|
+
if e.stderr is not None:
|
|
265
|
+
error_msg += f':\n{e.stderr.strip()}'
|
|
266
|
+
raise pxt.Error(error_msg) from e
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
@pxt.udf(is_method=True)
|
|
270
|
+
def clip(
|
|
271
|
+
video: pxt.Video,
|
|
272
|
+
*,
|
|
273
|
+
start_time: float,
|
|
274
|
+
end_time: float | None = None,
|
|
275
|
+
duration: float | None = None,
|
|
276
|
+
mode: Literal['fast', 'accurate'] = 'accurate',
|
|
277
|
+
video_encoder: str | None = None,
|
|
278
|
+
video_encoder_args: dict[str, Any] | None = None,
|
|
279
|
+
) -> pxt.Video | None:
|
|
280
|
+
"""
|
|
281
|
+
Extract a clip from a video, specified by `start_time` and either `end_time` or `duration` (in seconds).
|
|
282
|
+
|
|
283
|
+
If `start_time` is beyond the end of the video, returns None. Can only specify one of `end_time` and `duration`.
|
|
284
|
+
If both `end_time` and `duration` are None, the clip goes to the end of the video.
|
|
285
|
+
|
|
286
|
+
__Requirements:__
|
|
287
|
+
|
|
288
|
+
- `ffmpeg` needs to be installed and in PATH
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
video: Input video file
|
|
292
|
+
start_time: Start time in seconds
|
|
293
|
+
end_time: End time in seconds
|
|
294
|
+
duration: Duration of the clip in seconds
|
|
295
|
+
mode:
|
|
296
|
+
|
|
297
|
+
- `'fast'`: avoids re-encoding but starts the clip at the nearest keyframes and as a result, the clip
|
|
298
|
+
duration will be slightly longer than requested
|
|
299
|
+
- `'accurate'`: extracts a frame-accurate clip, but requires re-encoding
|
|
300
|
+
video_encoder: Video encoder to use. If not specified, uses the default encoder for the current platform.
|
|
301
|
+
Only available for `mode='accurate'`.
|
|
302
|
+
video_encoder_args: Additional arguments to pass to the video encoder. Only available for `mode='accurate'`.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
New video containing only the specified time range or None if start_time is beyond the end of the video.
|
|
306
|
+
"""
|
|
307
|
+
Env.get().require_binary('ffmpeg')
|
|
308
|
+
if start_time < 0:
|
|
309
|
+
raise pxt.Error(f'start_time must be non-negative, got {start_time}')
|
|
310
|
+
if end_time is not None and end_time <= start_time:
|
|
311
|
+
raise pxt.Error(f'end_time ({end_time}) must be greater than start_time ({start_time})')
|
|
312
|
+
if duration is not None and duration <= 0:
|
|
313
|
+
raise pxt.Error(f'duration must be positive, got {duration}')
|
|
314
|
+
if end_time is not None and duration is not None:
|
|
315
|
+
raise pxt.Error('end_time and duration cannot both be specified')
|
|
316
|
+
if mode == 'fast':
|
|
317
|
+
if video_encoder is not None:
|
|
318
|
+
raise pxt.Error("video_encoder is not supported for mode='fast'")
|
|
319
|
+
if video_encoder_args is not None:
|
|
320
|
+
raise pxt.Error("video_encoder_args is not supported for mode='fast'")
|
|
321
|
+
|
|
322
|
+
video_duration = av_utils.get_video_duration(video)
|
|
323
|
+
if video_duration is not None and start_time > video_duration:
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
output_path = str(TempStore.create_path(extension='.mp4'))
|
|
327
|
+
|
|
328
|
+
if end_time is not None:
|
|
329
|
+
duration = end_time - start_time
|
|
330
|
+
cmd = av_utils.ffmpeg_clip_cmd(
|
|
331
|
+
str(video),
|
|
332
|
+
output_path,
|
|
333
|
+
start_time,
|
|
334
|
+
duration,
|
|
335
|
+
fast=(mode == 'fast'),
|
|
336
|
+
video_encoder=video_encoder,
|
|
337
|
+
video_encoder_args=video_encoder_args,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
342
|
+
output_file = pathlib.Path(output_path)
|
|
343
|
+
if not output_file.exists() or output_file.stat().st_size == 0:
|
|
344
|
+
stderr_output = result.stderr.strip() if result.stderr is not None else ''
|
|
345
|
+
raise pxt.Error(f'ffmpeg failed to create output file for commandline: {" ".join(cmd)}\n{stderr_output}')
|
|
346
|
+
return output_path
|
|
347
|
+
except subprocess.CalledProcessError as e:
|
|
348
|
+
_handle_ffmpeg_error(e)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
@pxt.udf(is_method=True)
|
|
352
|
+
def segment_video(
|
|
353
|
+
video: pxt.Video,
|
|
354
|
+
*,
|
|
355
|
+
duration: float | None = None,
|
|
356
|
+
segment_times: list[float] | None = None,
|
|
357
|
+
mode: Literal['fast', 'accurate'] = 'accurate',
|
|
358
|
+
video_encoder: str | None = None,
|
|
359
|
+
video_encoder_args: dict[str, Any] | None = None,
|
|
360
|
+
) -> list[str]:
|
|
361
|
+
"""
|
|
362
|
+
Split a video into segments.
|
|
363
|
+
|
|
364
|
+
__Requirements:__
|
|
365
|
+
|
|
366
|
+
- `ffmpeg` needs to be installed and in PATH
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
video: Input video file to segment
|
|
370
|
+
duration: Duration of each segment (in seconds). For `mode='fast'`, this is approximate;
|
|
371
|
+
for `mode='accurate'`, segments will have exact durations. Cannot be specified together with
|
|
372
|
+
`segment_times`.
|
|
373
|
+
segment_times: List of timestamps (in seconds) in video where segments should be split. Note that these are not
|
|
374
|
+
segment durations. If all segment times are less than the duration of the video, produces exactly
|
|
375
|
+
`len(segment_times) + 1` segments. Cannot be empty or be specified together with `duration`.
|
|
376
|
+
mode: Segmentation mode:
|
|
377
|
+
|
|
378
|
+
- `'fast'`: Quick segmentation using stream copy (splits only at keyframes, approximate durations)
|
|
379
|
+
- `'accurate'`: Precise segmentation with re-encoding (exact durations, slower)
|
|
380
|
+
video_encoder: Video encoder to use. If not specified, uses the default encoder for the current platform.
|
|
381
|
+
Only available for `mode='accurate'`.
|
|
382
|
+
video_encoder_args: Additional arguments to pass to the video encoder. Only available for `mode='accurate'`.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
List of file paths for the generated video segments.
|
|
386
|
+
|
|
387
|
+
Raises:
|
|
388
|
+
pxt.Error: If the video is missing timing information.
|
|
389
|
+
|
|
390
|
+
Examples:
|
|
391
|
+
Split a video at 1 minute intervals using fast mode:
|
|
392
|
+
|
|
393
|
+
>>> tbl.select
|
|
394
|
+
... segment_paths=tbl.video.segment_video(
|
|
395
|
+
... duration=60, mode='fast'
|
|
396
|
+
... )
|
|
397
|
+
... ).collect()
|
|
398
|
+
|
|
399
|
+
Split video into exact 10-second segments with default accurate mode, using the libx264 encoder with a CRF of 23
|
|
400
|
+
and slow preset (for smaller output files):
|
|
401
|
+
|
|
402
|
+
>>> tbl.select(
|
|
403
|
+
... segment_paths=tbl.video.segment_video(
|
|
404
|
+
... duration=10,
|
|
405
|
+
... video_encoder='libx264',
|
|
406
|
+
... video_encoder_args={'crf': 23, 'preset': 'slow'}
|
|
407
|
+
... )
|
|
408
|
+
... ).collect()
|
|
409
|
+
|
|
410
|
+
Split video into two parts at the midpoint:
|
|
411
|
+
|
|
412
|
+
>>> duration = tbl.video.get_duration()
|
|
413
|
+
>>> tbl.select(
|
|
414
|
+
... segment_paths=tbl.video.segment_video(
|
|
415
|
+
... segment_times=[duration / 2]
|
|
416
|
+
... )
|
|
417
|
+
... ).collect()
|
|
126
418
|
"""
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
'
|
|
137
|
-
|
|
138
|
-
'
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
419
|
+
Env.get().require_binary('ffmpeg')
|
|
420
|
+
if duration is not None and segment_times is not None:
|
|
421
|
+
raise pxt.Error('duration and segment_times cannot both be specified')
|
|
422
|
+
if duration is not None and duration <= 0:
|
|
423
|
+
raise pxt.Error(f'duration must be positive, got {duration}')
|
|
424
|
+
if segment_times is not None and len(segment_times) == 0:
|
|
425
|
+
raise pxt.Error('segment_times cannot be empty')
|
|
426
|
+
if mode == 'fast':
|
|
427
|
+
if video_encoder is not None:
|
|
428
|
+
raise pxt.Error("video_encoder is not supported for mode='fast'")
|
|
429
|
+
if video_encoder_args is not None:
|
|
430
|
+
raise pxt.Error("video_encoder_args is not supported for mode='fast'")
|
|
431
|
+
|
|
432
|
+
base_path = TempStore.create_path(extension='')
|
|
433
|
+
|
|
434
|
+
output_paths: list[str] = []
|
|
435
|
+
if mode == 'accurate':
|
|
436
|
+
# Use ffmpeg -f segment for accurate segmentation with re-encoding
|
|
437
|
+
output_pattern = f'{base_path}_segment_%04d.mp4'
|
|
438
|
+
cmd = av_utils.ffmpeg_segment_cmd(
|
|
439
|
+
str(video),
|
|
440
|
+
output_pattern,
|
|
441
|
+
segment_duration=duration,
|
|
442
|
+
segment_times=segment_times,
|
|
443
|
+
video_encoder=video_encoder,
|
|
444
|
+
video_encoder_args=video_encoder_args,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
try:
|
|
448
|
+
_ = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
449
|
+
output_paths = sorted(glob.glob(f'{base_path}_segment_*.mp4'))
|
|
450
|
+
# TODO: is this actually an error?
|
|
451
|
+
# if len(output_paths) == 0:
|
|
452
|
+
# stderr_output = result.stderr.strip() if result.stderr is not None else ''
|
|
453
|
+
# raise pxt.Error(
|
|
454
|
+
# f'ffmpeg failed to create output files for commandline: {" ".join(cmd)}\n{stderr_output}'
|
|
455
|
+
# )
|
|
456
|
+
return output_paths
|
|
457
|
+
|
|
458
|
+
except subprocess.CalledProcessError as e:
|
|
459
|
+
_handle_ffmpeg_error(e)
|
|
460
|
+
|
|
169
461
|
else:
|
|
170
|
-
|
|
171
|
-
#
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
462
|
+
# Fast mode: extract consecutive clips using stream copy (no re-encoding)
|
|
463
|
+
# This is faster but can only split at keyframes, leading to approximate durations
|
|
464
|
+
start_time = 0.0
|
|
465
|
+
segment_idx = 0
|
|
466
|
+
try:
|
|
467
|
+
while True:
|
|
468
|
+
target_duration: float | None
|
|
469
|
+
if duration is not None:
|
|
470
|
+
target_duration = duration
|
|
471
|
+
elif segment_idx < len(segment_times):
|
|
472
|
+
target_duration = segment_times[segment_idx] - start_time
|
|
473
|
+
else:
|
|
474
|
+
target_duration = None # the rest
|
|
475
|
+
segment_path = f'{base_path}_segment_{len(output_paths)}.mp4'
|
|
476
|
+
cmd = av_utils.ffmpeg_clip_cmd(str(video), segment_path, start_time, target_duration)
|
|
477
|
+
|
|
478
|
+
_ = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
479
|
+
segment_duration = av_utils.get_video_duration(segment_path)
|
|
480
|
+
if segment_duration == 0.0:
|
|
481
|
+
# we're done
|
|
482
|
+
pathlib.Path(segment_path).unlink()
|
|
483
|
+
return output_paths
|
|
484
|
+
output_paths.append(segment_path)
|
|
485
|
+
start_time += segment_duration # use the actual segment duration here, it won't match duration exactly
|
|
486
|
+
|
|
487
|
+
segment_idx += 1
|
|
488
|
+
if segment_times is not None and segment_idx > len(segment_times):
|
|
489
|
+
break
|
|
490
|
+
|
|
491
|
+
return output_paths
|
|
492
|
+
|
|
493
|
+
except subprocess.CalledProcessError as e:
|
|
494
|
+
# clean up partial results
|
|
495
|
+
for segment_path in output_paths:
|
|
496
|
+
pathlib.Path(segment_path).unlink()
|
|
497
|
+
_handle_ffmpeg_error(e)
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
@pxt.udf(is_method=True)
|
|
501
|
+
def concat_videos(videos: list[pxt.Video]) -> pxt.Video:
|
|
502
|
+
"""
|
|
503
|
+
Merge multiple videos into a single video.
|
|
504
|
+
|
|
505
|
+
__Requirements:__
|
|
506
|
+
|
|
507
|
+
- `ffmpeg` needs to be installed and in PATH
|
|
508
|
+
|
|
509
|
+
Args:
|
|
510
|
+
videos: List of videos to merge.
|
|
511
|
+
|
|
512
|
+
Returns:
|
|
513
|
+
A new video containing the merged videos.
|
|
514
|
+
"""
|
|
515
|
+
Env.get().require_binary('ffmpeg')
|
|
516
|
+
if len(videos) == 0:
|
|
517
|
+
raise pxt.Error('concat_videos(): empty argument list')
|
|
518
|
+
|
|
519
|
+
# Check that all videos have the same resolution
|
|
520
|
+
resolutions: list[tuple[int, int]] = []
|
|
521
|
+
for video in videos:
|
|
522
|
+
metadata = av_utils.get_metadata(str(video))
|
|
523
|
+
video_stream = next((stream for stream in metadata['streams'] if stream['type'] == 'video'), None)
|
|
524
|
+
if video_stream is None:
|
|
525
|
+
raise pxt.Error(f'concat_videos(): file {video!r} has no video stream')
|
|
526
|
+
resolutions.append((video_stream['width'], video_stream['height']))
|
|
527
|
+
|
|
528
|
+
# check for divergence
|
|
529
|
+
x0, y0 = resolutions[0]
|
|
530
|
+
for i, (x, y) in enumerate(resolutions[1:], start=1):
|
|
531
|
+
if (x0, y0) != (x, y):
|
|
532
|
+
raise pxt.Error(
|
|
533
|
+
f'concat_videos(): requires that all videos have the same resolution, but:'
|
|
534
|
+
f'\n video 0 ({videos[0]!r}): {x0}x{y0}'
|
|
535
|
+
f'\n video {i} ({videos[i]!r}): {x}x{y}.'
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
# ffmpeg -f concat needs an input file list
|
|
539
|
+
filelist_path = TempStore.create_path(extension='.txt')
|
|
540
|
+
with filelist_path.open('w', encoding='utf-8') as f:
|
|
541
|
+
for video in videos:
|
|
542
|
+
f.write(f'file {video!r}\n')
|
|
543
|
+
|
|
544
|
+
output_path = TempStore.create_path(extension='.mp4')
|
|
545
|
+
|
|
546
|
+
try:
|
|
547
|
+
# First attempt: fast copy without re-encoding (works for compatible formats)
|
|
548
|
+
cmd = ['ffmpeg', '-f', 'concat', '-safe', '0', '-i', str(filelist_path), '-c', 'copy', '-y', str(output_path)]
|
|
549
|
+
_logger.debug(f'concat_videos(): {" ".join(cmd)}')
|
|
550
|
+
try:
|
|
551
|
+
_ = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
552
|
+
return str(output_path)
|
|
553
|
+
except subprocess.CalledProcessError:
|
|
554
|
+
# Expected for mixed formats - continue to fallback
|
|
555
|
+
pass
|
|
556
|
+
|
|
557
|
+
# we might have some corrupted output
|
|
558
|
+
if output_path.exists():
|
|
559
|
+
output_path.unlink()
|
|
560
|
+
|
|
561
|
+
# general approach: re-encode with -f filter_complex
|
|
562
|
+
#
|
|
563
|
+
# example: 2 videos with audio:
|
|
564
|
+
# ffmpeg -i video1.mp4 -i video2.mp4
|
|
565
|
+
# -filter_complex "[0:v:0][1:v:0]concat=n=2:v=1:a=0[outv];[0:a:0][1:a:0]concat=n=2:v=0:a=1[outa]"
|
|
566
|
+
# -map "[outv]" -map "[outa]"
|
|
567
|
+
# ...
|
|
568
|
+
# breakdown:
|
|
569
|
+
# - [0:v:0][1:v:0] - video stream 0 from inputs 0 and 1
|
|
570
|
+
# - concat=n=2:v=1:a=0[outv] - concat 2 inputs, 1 video stream, 0 audio, output to [outv]
|
|
571
|
+
# - [0:a:0][1:a:0] - audio stream 0 from inputs 0 and 1
|
|
572
|
+
# - concat=n=2:v=0:a=1[outa] - concat 2 inputs, 0 video, 1 audio stream, output to [outa]
|
|
573
|
+
|
|
574
|
+
cmd = ['ffmpeg']
|
|
575
|
+
for video in videos:
|
|
576
|
+
cmd.extend(['-i', video])
|
|
577
|
+
|
|
578
|
+
all_have_audio = all(av_utils.has_audio_stream(str(video)) for video in videos)
|
|
579
|
+
video_inputs = ''.join([f'[{i}:v:0]' for i in range(len(videos))])
|
|
580
|
+
# concat video streams
|
|
581
|
+
filter_str = f'{video_inputs}concat=n={len(videos)}:v=1:a=0[outv]'
|
|
582
|
+
if all_have_audio:
|
|
583
|
+
# also concat audio streams
|
|
584
|
+
audio_inputs = ''.join([f'[{i}:a:0]' for i in range(len(videos))])
|
|
585
|
+
filter_str += f';{audio_inputs}concat=n={len(videos)}:v=0:a=1[outa]'
|
|
586
|
+
cmd.extend(['-filter_complex', filter_str, '-map', '[outv]'])
|
|
587
|
+
if all_have_audio:
|
|
588
|
+
cmd.extend(['-map', '[outa]'])
|
|
589
|
+
|
|
590
|
+
video_encoder = Env.get().default_video_encoder
|
|
591
|
+
if video_encoder is not None:
|
|
592
|
+
cmd.extend(['-c:v', video_encoder])
|
|
593
|
+
if all_have_audio:
|
|
594
|
+
cmd.extend(['-c:a', 'aac'])
|
|
595
|
+
cmd.extend(['-pix_fmt', 'yuv420p', str(output_path)])
|
|
596
|
+
|
|
597
|
+
_ = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
598
|
+
return str(output_path)
|
|
599
|
+
|
|
600
|
+
except subprocess.CalledProcessError as e:
|
|
601
|
+
_handle_ffmpeg_error(e)
|
|
602
|
+
finally:
|
|
603
|
+
filelist_path.unlink()
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
@pxt.udf
|
|
607
|
+
def with_audio(
|
|
608
|
+
video: pxt.Video,
|
|
609
|
+
audio: pxt.Audio,
|
|
610
|
+
*,
|
|
611
|
+
video_start_time: float = 0.0,
|
|
612
|
+
video_duration: float | None = None,
|
|
613
|
+
audio_start_time: float = 0.0,
|
|
614
|
+
audio_duration: float | None = None,
|
|
615
|
+
) -> pxt.Video:
|
|
616
|
+
"""
|
|
617
|
+
Creates a new video that combines the video stream from `video` and the audio stream from `audio`.
|
|
618
|
+
The `start_time` and `duration` parameters can be used to select a specific time range from each input.
|
|
619
|
+
If the audio input (or selected time range) is longer than the video, the audio will be truncated.
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
__Requirements:__
|
|
623
|
+
|
|
624
|
+
- `ffmpeg` needs to be installed and in PATH
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
video: Input video.
|
|
628
|
+
audio: Input audio.
|
|
629
|
+
video_start_time: Start time in the video input (in seconds).
|
|
630
|
+
video_duration: Duration of video segment (in seconds). If None, uses the remainder of the video after
|
|
631
|
+
`video_start_time`. `video_duration` determines the duration of the output video.
|
|
632
|
+
audio_start_time: Start time in the audio input (in seconds).
|
|
633
|
+
audio_duration: Duration of audio segment (in seconds). If None, uses the remainder of the audio after
|
|
634
|
+
`audio_start_time`. If the audio is longer than the output video, it will be truncated.
|
|
635
|
+
|
|
636
|
+
Returns:
|
|
637
|
+
A new video file with the audio track added.
|
|
638
|
+
|
|
639
|
+
Examples:
|
|
640
|
+
Add background music to a video:
|
|
641
|
+
|
|
642
|
+
>>> tbl.select(tbl.video.with_audio(tbl.music_track)).collect()
|
|
643
|
+
|
|
644
|
+
Add audio starting 5 seconds into both files:
|
|
645
|
+
|
|
646
|
+
>>> tbl.select(
|
|
647
|
+
... tbl.video.with_audio(
|
|
648
|
+
... tbl.music_track,
|
|
649
|
+
... video_start_time=5.0,
|
|
650
|
+
... audio_start_time=5.0
|
|
651
|
+
... )
|
|
652
|
+
... ).collect()
|
|
653
|
+
|
|
654
|
+
Use a 10-second clip from the middle of both files:
|
|
655
|
+
|
|
656
|
+
>>> tbl.select(
|
|
657
|
+
... tbl.video.with_audio(
|
|
658
|
+
... tbl.music_track,
|
|
659
|
+
... video_start_time=30.0,
|
|
660
|
+
... video_duration=10.0,
|
|
661
|
+
... audio_start_time=15.0,
|
|
662
|
+
... audio_duration=10.0
|
|
663
|
+
... )
|
|
664
|
+
... ).collect()
|
|
665
|
+
"""
|
|
666
|
+
Env.get().require_binary('ffmpeg')
|
|
667
|
+
if video_start_time < 0:
|
|
668
|
+
raise pxt.Error(f'video_offset must be non-negative, got {video_start_time}')
|
|
669
|
+
if audio_start_time < 0:
|
|
670
|
+
raise pxt.Error(f'audio_offset must be non-negative, got {audio_start_time}')
|
|
671
|
+
if video_duration is not None and video_duration <= 0:
|
|
672
|
+
raise pxt.Error(f'video_duration must be positive, got {video_duration}')
|
|
673
|
+
if audio_duration is not None and audio_duration <= 0:
|
|
674
|
+
raise pxt.Error(f'audio_duration must be positive, got {audio_duration}')
|
|
675
|
+
|
|
676
|
+
output_path = str(TempStore.create_path(extension='.mp4'))
|
|
677
|
+
|
|
678
|
+
cmd = ['ffmpeg']
|
|
679
|
+
if video_start_time > 0:
|
|
680
|
+
# fast seek, must precede -i
|
|
681
|
+
cmd.extend(['-ss', str(video_start_time)])
|
|
682
|
+
if video_duration is not None:
|
|
683
|
+
cmd.extend(['-t', str(video_duration)])
|
|
684
|
+
else:
|
|
685
|
+
video_duration = av_utils.get_video_duration(video)
|
|
686
|
+
cmd.extend(['-i', str(video)])
|
|
687
|
+
|
|
688
|
+
if audio_start_time > 0:
|
|
689
|
+
cmd.extend(['-ss', str(audio_start_time)])
|
|
690
|
+
if audio_duration is not None:
|
|
691
|
+
cmd.extend(['-t', str(audio_duration)])
|
|
692
|
+
cmd.extend(['-i', str(audio)])
|
|
693
|
+
|
|
694
|
+
cmd.extend(
|
|
695
|
+
[
|
|
696
|
+
'-map',
|
|
697
|
+
'0:v:0', # video from first input
|
|
698
|
+
'-map',
|
|
699
|
+
'1:a:0', # audio from second input
|
|
700
|
+
'-c:v',
|
|
701
|
+
'copy', # avoid re-encoding
|
|
702
|
+
'-c:a',
|
|
703
|
+
'copy', # avoid re-encoding
|
|
704
|
+
'-t',
|
|
705
|
+
str(video_duration), # limit output duration to video duration
|
|
706
|
+
'-loglevel',
|
|
707
|
+
'error', # only show errors
|
|
708
|
+
output_path,
|
|
709
|
+
]
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
_logger.debug(f'with_audio(): {" ".join(cmd)}')
|
|
713
|
+
|
|
714
|
+
try:
|
|
715
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
716
|
+
output_file = pathlib.Path(output_path)
|
|
717
|
+
if not output_file.exists() or output_file.stat().st_size == 0:
|
|
718
|
+
stderr_output = result.stderr.strip() if result.stderr is not None else ''
|
|
719
|
+
raise pxt.Error(f'ffmpeg failed to create output file for commandline: {" ".join(cmd)}\n{stderr_output}')
|
|
720
|
+
return output_path
|
|
721
|
+
except subprocess.CalledProcessError as e:
|
|
722
|
+
_handle_ffmpeg_error(e)
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
@pxt.udf(is_method=True)
|
|
726
|
+
def overlay_text(
|
|
727
|
+
video: pxt.Video,
|
|
728
|
+
text: str,
|
|
729
|
+
*,
|
|
730
|
+
font: str | None = None,
|
|
731
|
+
font_size: int = 24,
|
|
732
|
+
color: str = 'white',
|
|
733
|
+
opacity: float = 1.0,
|
|
734
|
+
horizontal_align: Literal['left', 'center', 'right'] = 'center',
|
|
735
|
+
horizontal_margin: int = 0,
|
|
736
|
+
vertical_align: Literal['top', 'center', 'bottom'] = 'center',
|
|
737
|
+
vertical_margin: int = 0,
|
|
738
|
+
box: bool = False,
|
|
739
|
+
box_color: str = 'black',
|
|
740
|
+
box_opacity: float = 1.0,
|
|
741
|
+
box_border: list[int] | None = None,
|
|
742
|
+
) -> pxt.Video:
|
|
743
|
+
"""
|
|
744
|
+
Overlay text on a video with customizable positioning and styling.
|
|
745
|
+
|
|
746
|
+
__Requirements:__
|
|
747
|
+
|
|
748
|
+
- `ffmpeg` needs to be installed and in PATH
|
|
749
|
+
|
|
750
|
+
Args:
|
|
751
|
+
video: Input video to overlay text on.
|
|
752
|
+
text: The text string to overlay on the video.
|
|
753
|
+
font: Font family or path to font file. If None, uses the system default.
|
|
754
|
+
font_size: Size of the text in points.
|
|
755
|
+
color: Text color (e.g., `'white'`, `'red'`, `'#FF0000'`).
|
|
756
|
+
opacity: Text opacity from 0.0 (transparent) to 1.0 (opaque).
|
|
757
|
+
horizontal_align: Horizontal text alignment (`'left'`, `'center'`, `'right'`).
|
|
758
|
+
horizontal_margin: Horizontal margin in pixels from the alignment edge.
|
|
759
|
+
vertical_align: Vertical text alignment (`'top'`, `'center'`, `'bottom'`).
|
|
760
|
+
vertical_margin: Vertical margin in pixels from the alignment edge.
|
|
761
|
+
box: Whether to draw a background box behind the text.
|
|
762
|
+
box_color: Background box color as a string.
|
|
763
|
+
box_opacity: Background box opacity from 0.0 to 1.0.
|
|
764
|
+
box_border: Padding around text in the box in pixels.
|
|
765
|
+
|
|
766
|
+
- `[10]`: 10 pixels on all sides
|
|
767
|
+
- `[10, 20]`: 10 pixels on top/bottom, 20 on left/right
|
|
768
|
+
- `[10, 20, 30]`: 10 pixels on top, 20 on left/right, 30 on bottom
|
|
769
|
+
- `[10, 20, 30, 40]`: 10 pixels on top, 20 on right, 30 on bottom, 40 on left
|
|
770
|
+
|
|
771
|
+
Returns:
|
|
772
|
+
A new video with the text overlay applied.
|
|
773
|
+
|
|
774
|
+
Examples:
|
|
775
|
+
Add a simple text overlay to videos in a table:
|
|
776
|
+
|
|
777
|
+
>>> tbl.select(tbl.video.overlay_text('Sample Text')).collect()
|
|
778
|
+
|
|
779
|
+
Add a YouTube-style caption:
|
|
780
|
+
|
|
781
|
+
>>> tbl.select(
|
|
782
|
+
... tbl.video.overlay_text(
|
|
783
|
+
... 'Caption text',
|
|
784
|
+
... font_size=32,
|
|
785
|
+
... color='white',
|
|
786
|
+
... opacity=1.0,
|
|
787
|
+
... box=True,
|
|
788
|
+
... box_color='black',
|
|
789
|
+
... box_opacity=0.8,
|
|
790
|
+
... box_border=[6, 14],
|
|
791
|
+
... horizontal_margin=10,
|
|
792
|
+
... vertical_align='bottom',
|
|
793
|
+
... vertical_margin=70
|
|
794
|
+
... )
|
|
795
|
+
... ).collect()
|
|
796
|
+
|
|
797
|
+
Add text with a semi-transparent background box:
|
|
798
|
+
|
|
799
|
+
>>> tbl.select(
|
|
800
|
+
... tbl.video.overlay_text(
|
|
801
|
+
... 'Important Message',
|
|
802
|
+
... font_size=32,
|
|
803
|
+
... color='yellow',
|
|
804
|
+
... box=True,
|
|
805
|
+
... box_color='black',
|
|
806
|
+
... box_opacity=0.6,
|
|
807
|
+
... box_border=[20, 10]
|
|
808
|
+
... )
|
|
809
|
+
... ).collect()
|
|
810
|
+
"""
|
|
811
|
+
Env.get().require_binary('ffmpeg')
|
|
812
|
+
if font_size <= 0:
|
|
813
|
+
raise pxt.Error(f'font_size must be positive, got {font_size}')
|
|
814
|
+
if opacity < 0.0 or opacity > 1.0:
|
|
815
|
+
raise pxt.Error(f'opacity must be between 0.0 and 1.0, got {opacity}')
|
|
816
|
+
if horizontal_margin < 0:
|
|
817
|
+
raise pxt.Error(f'horizontal_margin must be non-negative, got {horizontal_margin}')
|
|
818
|
+
if vertical_margin < 0:
|
|
819
|
+
raise pxt.Error(f'vertical_margin must be non-negative, got {vertical_margin}')
|
|
820
|
+
if box_opacity < 0.0 or box_opacity > 1.0:
|
|
821
|
+
raise pxt.Error(f'box_opacity must be between 0.0 and 1.0, got {box_opacity}')
|
|
822
|
+
if box_border is not None and not (
|
|
823
|
+
isinstance(box_border, (list, tuple))
|
|
824
|
+
and len(box_border) >= 1
|
|
825
|
+
and len(box_border) <= 4
|
|
826
|
+
and all(isinstance(x, int) for x in box_border)
|
|
827
|
+
and all(x >= 0 for x in box_border)
|
|
828
|
+
):
|
|
829
|
+
raise pxt.Error(f'box_border must be a list or tuple of 1-4 non-negative ints, got {box_border!s} instead')
|
|
830
|
+
|
|
831
|
+
output_path = str(TempStore.create_path(extension='.mp4'))
|
|
832
|
+
|
|
833
|
+
drawtext_params = _create_drawtext_params(
|
|
834
|
+
text,
|
|
835
|
+
font,
|
|
836
|
+
font_size,
|
|
837
|
+
color,
|
|
838
|
+
opacity,
|
|
839
|
+
horizontal_align,
|
|
840
|
+
horizontal_margin,
|
|
841
|
+
vertical_align,
|
|
842
|
+
vertical_margin,
|
|
843
|
+
box,
|
|
844
|
+
box_color,
|
|
845
|
+
box_opacity,
|
|
846
|
+
box_border,
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
cmd = [
|
|
850
|
+
'ffmpeg',
|
|
851
|
+
'-i',
|
|
852
|
+
str(video),
|
|
853
|
+
'-vf',
|
|
854
|
+
'drawtext=' + ':'.join(drawtext_params),
|
|
855
|
+
'-c:a',
|
|
856
|
+
'copy', # Copy audio stream unchanged
|
|
857
|
+
'-loglevel',
|
|
858
|
+
'error', # Only show errors
|
|
859
|
+
output_path,
|
|
860
|
+
]
|
|
861
|
+
_logger.debug(f'overlay_text(): {" ".join(cmd)}')
|
|
862
|
+
|
|
863
|
+
try:
|
|
864
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
865
|
+
output_file = pathlib.Path(output_path)
|
|
866
|
+
if not output_file.exists() or output_file.stat().st_size == 0:
|
|
867
|
+
stderr_output = result.stderr.strip() if result.stderr is not None else ''
|
|
868
|
+
raise pxt.Error(f'ffmpeg failed to create output file for commandline: {" ".join(cmd)}\n{stderr_output}')
|
|
869
|
+
return output_path
|
|
870
|
+
except subprocess.CalledProcessError as e:
|
|
871
|
+
_handle_ffmpeg_error(e)
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
def _create_drawtext_params(
|
|
875
|
+
text: str,
|
|
876
|
+
font: str | None,
|
|
877
|
+
font_size: int,
|
|
878
|
+
color: str,
|
|
879
|
+
opacity: float,
|
|
880
|
+
horizontal_align: str,
|
|
881
|
+
horizontal_margin: int,
|
|
882
|
+
vertical_align: str,
|
|
883
|
+
vertical_margin: int,
|
|
884
|
+
box: bool,
|
|
885
|
+
box_color: str,
|
|
886
|
+
box_opacity: float,
|
|
887
|
+
box_border: list[int] | None,
|
|
888
|
+
) -> list[str]:
|
|
889
|
+
"""Construct parameters for the ffmpeg drawtext filter"""
|
|
890
|
+
drawtext_params: list[str] = []
|
|
891
|
+
escaped_text = text.replace('\\', '\\\\').replace(':', '\\:').replace("'", "\\'")
|
|
892
|
+
drawtext_params.append(f"text='{escaped_text}'")
|
|
893
|
+
drawtext_params.append(f'fontsize={font_size}')
|
|
894
|
+
|
|
895
|
+
if font is not None:
|
|
896
|
+
if pathlib.Path(font).exists():
|
|
897
|
+
drawtext_params.append(f"fontfile='{font}'")
|
|
898
|
+
else:
|
|
899
|
+
drawtext_params.append(f"font='{font}'")
|
|
900
|
+
if opacity < 1.0:
|
|
901
|
+
drawtext_params.append(f'fontcolor={color}@{opacity}')
|
|
902
|
+
else:
|
|
903
|
+
drawtext_params.append(f'fontcolor={color}')
|
|
904
|
+
|
|
905
|
+
if horizontal_align == 'left':
|
|
906
|
+
x_expr = str(horizontal_margin)
|
|
907
|
+
elif horizontal_align == 'center':
|
|
908
|
+
x_expr = '(w-text_w)/2'
|
|
909
|
+
else: # right
|
|
910
|
+
x_expr = f'w-text_w-{horizontal_margin}' if horizontal_margin != 0 else 'w-text_w'
|
|
911
|
+
if vertical_align == 'top':
|
|
912
|
+
y_expr = str(vertical_margin)
|
|
913
|
+
elif vertical_align == 'center':
|
|
914
|
+
y_expr = '(h-text_h)/2'
|
|
915
|
+
else: # bottom
|
|
916
|
+
y_expr = f'h-text_h-{vertical_margin}' if vertical_margin != 0 else 'h-text_h'
|
|
917
|
+
drawtext_params.extend([f'x={x_expr}', f'y={y_expr}'])
|
|
918
|
+
|
|
919
|
+
if box:
|
|
920
|
+
drawtext_params.append('box=1')
|
|
921
|
+
if box_opacity < 1.0:
|
|
922
|
+
drawtext_params.append(f'boxcolor={box_color}@{box_opacity}')
|
|
923
|
+
else:
|
|
924
|
+
drawtext_params.append(f'boxcolor={box_color}')
|
|
925
|
+
if box_border is not None:
|
|
926
|
+
drawtext_params.append(f'boxborderw={"|".join(map(str, box_border))}')
|
|
927
|
+
|
|
928
|
+
return drawtext_params
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
@pxt.udf(is_method=True)
|
|
932
|
+
def scene_detect_adaptive(
|
|
933
|
+
video: pxt.Video,
|
|
934
|
+
*,
|
|
935
|
+
fps: float | None = None,
|
|
936
|
+
adaptive_threshold: float = 3.0,
|
|
937
|
+
min_scene_len: int = 15,
|
|
938
|
+
window_width: int = 2,
|
|
939
|
+
min_content_val: float = 15.0,
|
|
940
|
+
delta_hue: float = 1.0,
|
|
941
|
+
delta_sat: float = 1.0,
|
|
942
|
+
delta_lum: float = 1.0,
|
|
943
|
+
delta_edges: float = 0.0,
|
|
944
|
+
luma_only: bool = False,
|
|
945
|
+
kernel_size: int | None = None,
|
|
946
|
+
) -> list[dict]:
|
|
947
|
+
"""
|
|
948
|
+
Detect scene cuts in a video using PySceneDetect's
|
|
949
|
+
[AdaptiveDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.adaptive_detector.AdaptiveDetector).
|
|
950
|
+
|
|
951
|
+
__Requirements:__
|
|
952
|
+
|
|
953
|
+
- `pip install scenedetect`
|
|
954
|
+
|
|
955
|
+
Args:
|
|
956
|
+
video: The video to analyze for scene cuts.
|
|
957
|
+
fps: Number of frames to extract per second for analysis. If None or 0, analyzes all frames.
|
|
958
|
+
Lower values process faster but may miss exact scene cuts.
|
|
959
|
+
adaptive_threshold: Threshold that the score ratio must exceed to trigger a new scene cut.
|
|
960
|
+
Lower values will detect more scenes (more sensitive), higher values will detect fewer scenes.
|
|
961
|
+
min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
|
|
962
|
+
list.
|
|
963
|
+
window_width: Size of window (number of frames) before and after each frame to average together in order to
|
|
964
|
+
detect deviations from the mean. Must be at least 1.
|
|
965
|
+
min_content_val: Minimum threshold (float) that the content_val must exceed in order to register as a new scene.
|
|
966
|
+
This is calculated the same way that `scene_detect_content()` calculates frame
|
|
967
|
+
score based on weights/luma_only/kernel_size.
|
|
968
|
+
delta_hue: Weight for hue component changes. Higher values make hue changes more important.
|
|
969
|
+
delta_sat: Weight for saturation component changes. Higher values make saturation changes more important.
|
|
970
|
+
delta_lum: Weight for luminance component changes. Higher values make brightness changes more important.
|
|
971
|
+
delta_edges: Weight for edge detection changes. Higher values make edge changes more important.
|
|
972
|
+
Edge detection can help detect cuts in scenes with similar colors but different content.
|
|
973
|
+
luma_only: If True, only analyzes changes in the luminance (brightness) channel of the video,
|
|
974
|
+
ignoring color information. This can be faster and may work better for grayscale content.
|
|
975
|
+
kernel_size: Size of kernel to use for post edge detection filtering. If None, automatically set based on video
|
|
976
|
+
resolution.
|
|
977
|
+
|
|
978
|
+
Returns:
|
|
979
|
+
A list of dictionaries, one for each detected scene, with the following keys:
|
|
980
|
+
|
|
981
|
+
- `start_time` (float): The start time of the scene in seconds.
|
|
982
|
+
- `start_pts` (int): The pts of the start of the scene.
|
|
983
|
+
- `duration` (float): The duration of the scene in seconds.
|
|
984
|
+
|
|
985
|
+
The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
|
|
986
|
+
|
|
987
|
+
Examples:
|
|
988
|
+
Detect scene cuts with default parameters:
|
|
989
|
+
|
|
990
|
+
>>> tbl.select(tbl.video.scene_detect_adaptive()).collect()
|
|
991
|
+
|
|
992
|
+
Detect more scenes by lowering the threshold:
|
|
993
|
+
|
|
994
|
+
>>> tbl.select(tbl.video.scene_detect_adaptive(adaptive_threshold=1.5)).collect()
|
|
995
|
+
|
|
996
|
+
Use luminance-only detection with a longer minimum scene length:
|
|
997
|
+
|
|
998
|
+
>>> tbl.select(
|
|
999
|
+
... tbl.video.scene_detect_adaptive(
|
|
1000
|
+
... luma_only=True,
|
|
1001
|
+
... min_scene_len=30
|
|
1002
|
+
... )
|
|
1003
|
+
... ).collect()
|
|
1004
|
+
|
|
1005
|
+
Add scene cuts as a computed column:
|
|
1006
|
+
|
|
1007
|
+
>>> tbl.add_computed_column(
|
|
1008
|
+
... scene_cuts=tbl.video.scene_detect_adaptive(adaptive_threshold=2.0)
|
|
1009
|
+
... )
|
|
1010
|
+
|
|
1011
|
+
Analyze at a lower frame rate for faster processing:
|
|
1012
|
+
|
|
1013
|
+
>>> tbl.select(tbl.video.scene_detect_adaptive(fps=2.0)).collect()
|
|
1014
|
+
"""
|
|
1015
|
+
Env.get().require_package('scenedetect')
|
|
1016
|
+
from scenedetect.detectors import AdaptiveDetector, ContentDetector
|
|
1017
|
+
|
|
1018
|
+
weights = ContentDetector.Components(
|
|
1019
|
+
delta_hue=delta_hue, delta_sat=delta_sat, delta_lum=delta_lum, delta_edges=delta_edges
|
|
1020
|
+
)
|
|
1021
|
+
try:
|
|
1022
|
+
detector = AdaptiveDetector(
|
|
1023
|
+
adaptive_threshold=adaptive_threshold,
|
|
1024
|
+
min_scene_len=min_scene_len,
|
|
1025
|
+
window_width=window_width,
|
|
1026
|
+
min_content_val=min_content_val,
|
|
1027
|
+
weights=weights,
|
|
1028
|
+
luma_only=luma_only,
|
|
1029
|
+
kernel_size=kernel_size,
|
|
1030
|
+
)
|
|
1031
|
+
return _scene_detect(video, fps, detector)
|
|
1032
|
+
except Exception as e:
|
|
1033
|
+
raise pxt.Error(f'scene_detect_adaptive(): failed to detect scenes: {e}') from e
|
|
1034
|
+
|
|
1035
|
+
|
|
1036
|
+
@pxt.udf(is_method=True)
|
|
1037
|
+
def scene_detect_content(
|
|
1038
|
+
video: pxt.Video,
|
|
1039
|
+
*,
|
|
1040
|
+
fps: float | None = None,
|
|
1041
|
+
threshold: float = 27.0,
|
|
1042
|
+
min_scene_len: int = 15,
|
|
1043
|
+
delta_hue: float = 1.0,
|
|
1044
|
+
delta_sat: float = 1.0,
|
|
1045
|
+
delta_lum: float = 1.0,
|
|
1046
|
+
delta_edges: float = 0.0,
|
|
1047
|
+
luma_only: bool = False,
|
|
1048
|
+
kernel_size: int | None = None,
|
|
1049
|
+
filter_mode: Literal['merge', 'suppress'] = 'merge',
|
|
1050
|
+
) -> list[dict]:
|
|
1051
|
+
"""
|
|
1052
|
+
Detect scene cuts in a video using PySceneDetect's
|
|
1053
|
+
[ContentDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.content_detector.ContentDetector).
|
|
1054
|
+
|
|
1055
|
+
__Requirements:__
|
|
1056
|
+
|
|
1057
|
+
- `pip install scenedetect`
|
|
1058
|
+
|
|
1059
|
+
Args:
|
|
1060
|
+
video: The video to analyze for scene cuts.
|
|
1061
|
+
fps: Number of frames to extract per second for analysis. If None, analyzes all frames.
|
|
1062
|
+
Lower values process faster but may miss exact scene cuts.
|
|
1063
|
+
threshold: Threshold that the weighted sum of component changes must exceed to trigger a scene cut.
|
|
1064
|
+
Lower values detect more scenes (more sensitive), higher values detect fewer scenes.
|
|
1065
|
+
min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
|
|
1066
|
+
list.
|
|
1067
|
+
delta_hue: Weight for hue component changes. Higher values make hue changes more important.
|
|
1068
|
+
delta_sat: Weight for saturation component changes. Higher values make saturation changes more important.
|
|
1069
|
+
delta_lum: Weight for luminance component changes. Higher values make brightness changes more important.
|
|
1070
|
+
delta_edges: Weight for edge detection changes. Higher values make edge changes more important.
|
|
1071
|
+
Edge detection can help detect cuts in scenes with similar colors but different content.
|
|
1072
|
+
luma_only: If True, only analyzes changes in the luminance (brightness) channel,
|
|
1073
|
+
ignoring color information. This can be faster and may work better for grayscale content.
|
|
1074
|
+
kernel_size: Size of kernel for expanding detected edges. Must be odd integer greater than or equal to 3. If
|
|
1075
|
+
None, automatically set using video resolution.
|
|
1076
|
+
filter_mode: How to handle fast cuts/flashes. 'merge' combines quick cuts, 'suppress' filters them out.
|
|
1077
|
+
|
|
1078
|
+
Returns:
|
|
1079
|
+
A list of dictionaries, one for each detected scene, with the following keys:
|
|
1080
|
+
|
|
1081
|
+
- `start_time` (float): The start time of the scene in seconds.
|
|
1082
|
+
- `start_pts` (int): The pts of the start of the scene.
|
|
1083
|
+
- `duration` (float): The duration of the scene in seconds.
|
|
1084
|
+
|
|
1085
|
+
The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
|
|
1086
|
+
|
|
1087
|
+
Examples:
|
|
1088
|
+
Detect scene cuts with default parameters:
|
|
1089
|
+
|
|
1090
|
+
>>> tbl.select(tbl.video.scene_detect_content()).collect()
|
|
1091
|
+
|
|
1092
|
+
Detect more scenes by lowering the threshold:
|
|
1093
|
+
|
|
1094
|
+
>>> tbl.select(tbl.video.scene_detect_content(threshold=15.0)).collect()
|
|
1095
|
+
|
|
1096
|
+
Use luminance-only detection:
|
|
1097
|
+
|
|
1098
|
+
>>> tbl.select(tbl.video.scene_detect_content(luma_only=True)).collect()
|
|
1099
|
+
|
|
1100
|
+
Emphasize edge detection for scenes with similar colors:
|
|
1101
|
+
|
|
1102
|
+
>>> tbl.select(
|
|
1103
|
+
... tbl.video.scene_detect_content(
|
|
1104
|
+
... delta_edges=1.0,
|
|
1105
|
+
... delta_hue=0.5,
|
|
1106
|
+
... delta_sat=0.5
|
|
1107
|
+
... )
|
|
1108
|
+
... ).collect()
|
|
1109
|
+
|
|
1110
|
+
Add scene cuts as a computed column:
|
|
1111
|
+
|
|
1112
|
+
>>> tbl.add_computed_column(
|
|
1113
|
+
... scene_cuts=tbl.video.scene_detect_content(threshold=20.0)
|
|
1114
|
+
... )
|
|
1115
|
+
"""
|
|
1116
|
+
Env.get().require_package('scenedetect')
|
|
1117
|
+
from scenedetect.detectors import ContentDetector
|
|
1118
|
+
from scenedetect.detectors.content_detector import FlashFilter # type: ignore[import-untyped]
|
|
1119
|
+
|
|
1120
|
+
weights = ContentDetector.Components(
|
|
1121
|
+
delta_hue=delta_hue, delta_sat=delta_sat, delta_lum=delta_lum, delta_edges=delta_edges
|
|
1122
|
+
)
|
|
1123
|
+
filter_mode_enum = FlashFilter.Mode.MERGE if filter_mode == 'merge' else FlashFilter.Mode.SUPPRESS
|
|
1124
|
+
|
|
1125
|
+
try:
|
|
1126
|
+
detector = ContentDetector(
|
|
1127
|
+
threshold=threshold,
|
|
1128
|
+
min_scene_len=min_scene_len,
|
|
1129
|
+
weights=weights,
|
|
1130
|
+
luma_only=luma_only,
|
|
1131
|
+
kernel_size=kernel_size,
|
|
1132
|
+
filter_mode=filter_mode_enum,
|
|
182
1133
|
)
|
|
1134
|
+
return _scene_detect(video, fps, detector)
|
|
1135
|
+
except Exception as e:
|
|
1136
|
+
raise pxt.Error(f'scene_detect_content(): failed to detect scenes: {e}') from e
|
|
1137
|
+
|
|
1138
|
+
|
|
1139
|
+
@pxt.udf(is_method=True)
|
|
1140
|
+
def scene_detect_threshold(
|
|
1141
|
+
video: pxt.Video,
|
|
1142
|
+
*,
|
|
1143
|
+
fps: float | None = None,
|
|
1144
|
+
threshold: float = 12.0,
|
|
1145
|
+
min_scene_len: int = 15,
|
|
1146
|
+
fade_bias: float = 0.0,
|
|
1147
|
+
add_final_scene: bool = False,
|
|
1148
|
+
method: Literal['ceiling', 'floor'] = 'floor',
|
|
1149
|
+
) -> list[dict]:
|
|
1150
|
+
"""
|
|
1151
|
+
Detect fade-in and fade-out transitions in a video using PySceneDetect's
|
|
1152
|
+
[ThresholdDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.threshold_detector.ThresholdDetector).
|
|
1153
|
+
|
|
1154
|
+
ThresholdDetector identifies scenes by detecting when pixel brightness falls below or rises above
|
|
1155
|
+
a threshold value, suitable for detecting fade-to-black, fade-to-white, and similar transitions.
|
|
1156
|
+
|
|
1157
|
+
__Requirements:__
|
|
1158
|
+
|
|
1159
|
+
- `pip install scenedetect`
|
|
1160
|
+
|
|
1161
|
+
Args:
|
|
1162
|
+
video: The video to analyze for fade transitions.
|
|
1163
|
+
fps: Number of frames to extract per second for analysis. If None or 0, analyzes all frames.
|
|
1164
|
+
Lower values process faster but may miss exact transition points.
|
|
1165
|
+
threshold: 8-bit intensity value that each pixel value (R, G, and B) must be less than or equal to in order
|
|
1166
|
+
to trigger a fade in/out.
|
|
1167
|
+
min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
|
|
1168
|
+
list.
|
|
1169
|
+
fade_bias: Float between -1.0 and +1.0 representing the percentage of timecode skew for the start of a scene
|
|
1170
|
+
(-1.0 causing a cut at the fade-to-black, 0.0 in the middle, and +1.0 causing the cut to be right at the
|
|
1171
|
+
position where the threshold is passed).
|
|
1172
|
+
add_final_scene: Boolean indicating if the video ends on a fade-out to generate an additional scene at this
|
|
1173
|
+
timecode.
|
|
1174
|
+
method: How to treat threshold when detecting fade events
|
|
1175
|
+
- 'ceiling': Fade out happens when frame brightness rises above threshold.
|
|
1176
|
+
- 'floor': Fade out happens when frame brightness falls below threshold.
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
Returns:
|
|
1180
|
+
A list of dictionaries, one for each detected scene, with the following keys:
|
|
1181
|
+
|
|
1182
|
+
- `start_time` (float): The start time of the scene in seconds.
|
|
1183
|
+
- `start_pts` (int): The pts of the start of the scene.
|
|
1184
|
+
- `duration` (float): The duration of the scene in seconds.
|
|
1185
|
+
|
|
1186
|
+
The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
|
|
1187
|
+
|
|
1188
|
+
Examples:
|
|
1189
|
+
Detect fade-to-black transitions with default parameters:
|
|
1190
|
+
|
|
1191
|
+
>>> tbl.select(tbl.video.scene_detect_threshold()).collect()
|
|
1192
|
+
|
|
1193
|
+
Use a lower threshold to detect darker fades:
|
|
1194
|
+
|
|
1195
|
+
>>> tbl.select(tbl.video.scene_detect_threshold(threshold=8.0)).collect()
|
|
1196
|
+
|
|
1197
|
+
Detect both fade-to-black and fade-to-white using absolute method:
|
|
1198
|
+
|
|
1199
|
+
>>> tbl.select(tbl.video.scene_detect_threshold(method='absolute')).collect()
|
|
1200
|
+
|
|
1201
|
+
Add final scene boundary:
|
|
1202
|
+
|
|
1203
|
+
>>> tbl.select(
|
|
1204
|
+
... tbl.video.scene_detect_threshold(
|
|
1205
|
+
... add_final_scene=True
|
|
1206
|
+
... )
|
|
1207
|
+
... ).collect()
|
|
1208
|
+
|
|
1209
|
+
Add fade transitions as a computed column:
|
|
1210
|
+
|
|
1211
|
+
>>> tbl.add_computed_column(
|
|
1212
|
+
... fade_cuts=tbl.video.scene_detect_threshold(threshold=15.0)
|
|
1213
|
+
... )
|
|
1214
|
+
"""
|
|
1215
|
+
Env.get().require_package('scenedetect')
|
|
1216
|
+
from scenedetect.detectors import ThresholdDetector
|
|
1217
|
+
|
|
1218
|
+
method_enum = ThresholdDetector.Method.FLOOR if method == 'floor' else ThresholdDetector.Method.CEILING
|
|
1219
|
+
try:
|
|
1220
|
+
detector = ThresholdDetector(
|
|
1221
|
+
threshold=threshold,
|
|
1222
|
+
min_scene_len=min_scene_len,
|
|
1223
|
+
fade_bias=fade_bias,
|
|
1224
|
+
add_final_scene=add_final_scene,
|
|
1225
|
+
method=method_enum,
|
|
1226
|
+
)
|
|
1227
|
+
return _scene_detect(video, fps, detector)
|
|
1228
|
+
except Exception as e:
|
|
1229
|
+
raise pxt.Error(f'scene_detect_threshold(): failed to detect scenes: {e}') from e
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
@pxt.udf(is_method=True)
|
|
1233
|
+
def scene_detect_histogram(
|
|
1234
|
+
video: pxt.Video, *, fps: float | None = None, threshold: float = 0.05, bins: int = 256, min_scene_len: int = 15
|
|
1235
|
+
) -> list[dict]:
|
|
1236
|
+
"""
|
|
1237
|
+
Detect scene cuts in a video using PySceneDetect's
|
|
1238
|
+
[HistogramDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.histogram_detector.HistogramDetector).
|
|
1239
|
+
|
|
1240
|
+
HistogramDetector compares frame histograms on the Y (luminance) channel after YUV conversion.
|
|
1241
|
+
It detects scenes based on relative histogram differences and is more robust to gradual lighting
|
|
1242
|
+
changes than content-based detection.
|
|
1243
|
+
|
|
1244
|
+
__Requirements:__
|
|
1245
|
+
|
|
1246
|
+
- `pip install scenedetect`
|
|
1247
|
+
|
|
1248
|
+
Args:
|
|
1249
|
+
video: The video to analyze for scene cuts.
|
|
1250
|
+
fps: Number of frames to extract per second for analysis. If None or 0, analyzes all frames.
|
|
1251
|
+
Lower values process faster but may miss exact scene cuts.
|
|
1252
|
+
threshold: Maximum relative difference between 0.0 and 1.0 that the histograms can differ. Histograms are
|
|
1253
|
+
calculated on the Y channel after converting the frame to YUV, and normalized based on the number of bins.
|
|
1254
|
+
Higher differences imply greater change in content, so larger threshold values are less sensitive to cuts.
|
|
1255
|
+
Lower values detect more scenes (more sensitive), higher values detect fewer scenes.
|
|
1256
|
+
bins: Number of bins to use for histogram calculation (typically 16-256). More bins provide
|
|
1257
|
+
finer granularity but may be more sensitive to noise.
|
|
1258
|
+
min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
|
|
1259
|
+
list.
|
|
1260
|
+
|
|
1261
|
+
|
|
1262
|
+
Returns:
|
|
1263
|
+
A list of dictionaries, one for each detected scene, with the following keys:
|
|
1264
|
+
|
|
1265
|
+
- `start_time` (float): The start time of the scene in seconds.
|
|
1266
|
+
- `start_pts` (int): The pts of the start of the scene.
|
|
1267
|
+
- `duration` (float): The duration of the scene in seconds.
|
|
1268
|
+
|
|
1269
|
+
The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
|
|
1270
|
+
|
|
1271
|
+
Examples:
|
|
1272
|
+
Detect scene cuts with default parameters:
|
|
1273
|
+
|
|
1274
|
+
>>> tbl.select(tbl.video.scene_detect_histogram()).collect()
|
|
1275
|
+
|
|
1276
|
+
Detect more scenes by lowering the threshold:
|
|
1277
|
+
|
|
1278
|
+
>>> tbl.select(tbl.video.scene_detect_histogram(threshold=0.03)).collect()
|
|
1279
|
+
|
|
1280
|
+
Use fewer bins for faster processing:
|
|
1281
|
+
|
|
1282
|
+
>>> tbl.select(tbl.video.scene_detect_histogram(bins=64)).collect()
|
|
1283
|
+
|
|
1284
|
+
Use with a longer minimum scene length:
|
|
1285
|
+
|
|
1286
|
+
>>> tbl.select(
|
|
1287
|
+
... tbl.video.scene_detect_histogram(
|
|
1288
|
+
... min_scene_len=30
|
|
1289
|
+
... )
|
|
1290
|
+
... ).collect()
|
|
1291
|
+
|
|
1292
|
+
Add scene cuts as a computed column:
|
|
1293
|
+
|
|
1294
|
+
>>> tbl.add_computed_column(
|
|
1295
|
+
... scene_cuts=tbl.video.scene_detect_histogram(threshold=0.04)
|
|
1296
|
+
... )
|
|
1297
|
+
"""
|
|
1298
|
+
Env.get().require_package('scenedetect')
|
|
1299
|
+
from scenedetect.detectors import HistogramDetector
|
|
1300
|
+
|
|
1301
|
+
try:
|
|
1302
|
+
detector = HistogramDetector(threshold=threshold, bins=bins, min_scene_len=min_scene_len)
|
|
1303
|
+
return _scene_detect(video, fps, detector)
|
|
1304
|
+
except Exception as e:
|
|
1305
|
+
raise pxt.Error(f'scene_detect_histogram(): failed to detect scenes: {e}') from e
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
@pxt.udf(is_method=True)
|
|
1309
|
+
def scene_detect_hash(
|
|
1310
|
+
video: pxt.Video,
|
|
1311
|
+
*,
|
|
1312
|
+
fps: float | None = None,
|
|
1313
|
+
threshold: float = 0.395,
|
|
1314
|
+
size: int = 16,
|
|
1315
|
+
lowpass: int = 2,
|
|
1316
|
+
min_scene_len: int = 15,
|
|
1317
|
+
) -> list[dict]:
|
|
1318
|
+
"""
|
|
1319
|
+
Detect scene cuts in a video using PySceneDetect's
|
|
1320
|
+
[HashDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.hash_detector.HashDetector).
|
|
1321
|
+
|
|
1322
|
+
HashDetector uses perceptual hashing for very fast scene detection. It computes a hash of each
|
|
1323
|
+
frame at reduced resolution and compares hash distances.
|
|
1324
|
+
|
|
1325
|
+
__Requirements:__
|
|
1326
|
+
|
|
1327
|
+
- `pip install scenedetect`
|
|
1328
|
+
|
|
1329
|
+
Args:
|
|
1330
|
+
video: The video to analyze for scene cuts.
|
|
1331
|
+
fps: Number of frames to extract per second for analysis. If None, analyzes all frames.
|
|
1332
|
+
Lower values process faster but may miss exact scene cuts.
|
|
1333
|
+
threshold: Value from 0.0 and 1.0 representing the relative hamming distance between the perceptual hashes of
|
|
1334
|
+
adjacent frames. A distance of 0 means the image is the same, and 1 means no correlation. Smaller threshold
|
|
1335
|
+
values thus require more correlation, making the detector more sensitive. The Hamming distance is divided
|
|
1336
|
+
by size x size before comparing to threshold for normalization.
|
|
1337
|
+
Lower values detect more scenes (more sensitive), higher values detect fewer scenes.
|
|
1338
|
+
size: Size of square of low frequency data to use for the DCT. Larger values are more precise but slower.
|
|
1339
|
+
Common values are 8, 16, or 32.
|
|
1340
|
+
lowpass: How much high frequency information to filter from the DCT. A value of 2 means keep lower 1/2 of the
|
|
1341
|
+
frequency data, 4 means only keep 1/4, etc. Larger values make the
|
|
1342
|
+
detector less sensitive to high-frequency details and noise.
|
|
1343
|
+
min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
|
|
1344
|
+
list.
|
|
1345
|
+
|
|
1346
|
+
|
|
1347
|
+
Returns:
|
|
1348
|
+
A list of dictionaries, one for each detected scene, with the following keys:
|
|
1349
|
+
|
|
1350
|
+
- `start_time` (float): The start time of the scene in seconds.
|
|
1351
|
+
- `start_pts` (int): The pts of the start of the scene.
|
|
1352
|
+
- `duration` (float): The duration of the scene in seconds.
|
|
1353
|
+
|
|
1354
|
+
The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
|
|
1355
|
+
|
|
1356
|
+
Examples:
|
|
1357
|
+
Detect scene cuts with default parameters:
|
|
1358
|
+
|
|
1359
|
+
>>> tbl.select(tbl.video.scene_detect_hash()).collect()
|
|
1360
|
+
|
|
1361
|
+
Detect more scenes by lowering the threshold:
|
|
1362
|
+
|
|
1363
|
+
>>> tbl.select(tbl.video.scene_detect_hash(threshold=0.3)).collect()
|
|
1364
|
+
|
|
1365
|
+
Use larger hash size for more precision:
|
|
1366
|
+
|
|
1367
|
+
>>> tbl.select(tbl.video.scene_detect_hash(size=32)).collect()
|
|
1368
|
+
|
|
1369
|
+
Use for fast processing with lower frame rate:
|
|
1370
|
+
|
|
1371
|
+
>>> tbl.select(
|
|
1372
|
+
... tbl.video.scene_detect_hash(
|
|
1373
|
+
... fps=1.0,
|
|
1374
|
+
... threshold=0.4
|
|
1375
|
+
... )
|
|
1376
|
+
... ).collect()
|
|
1377
|
+
|
|
1378
|
+
Add scene cuts as a computed column:
|
|
1379
|
+
|
|
1380
|
+
>>> tbl.add_computed_column(
|
|
1381
|
+
... scene_cuts=tbl.video.scene_detect_hash()
|
|
1382
|
+
... )
|
|
1383
|
+
"""
|
|
1384
|
+
Env.get().require_package('scenedetect')
|
|
1385
|
+
from scenedetect.detectors import HashDetector
|
|
1386
|
+
|
|
1387
|
+
try:
|
|
1388
|
+
detector = HashDetector(threshold=threshold, size=size, lowpass=lowpass, min_scene_len=min_scene_len)
|
|
1389
|
+
return _scene_detect(video, fps, detector)
|
|
1390
|
+
except Exception as e:
|
|
1391
|
+
raise pxt.Error(f'scene_detect_hash(): failed to detect scenes: {e}') from e
|
|
1392
|
+
|
|
1393
|
+
|
|
1394
|
+
class _SceneDetectFrameInfo(NamedTuple):
|
|
1395
|
+
frame_idx: int
|
|
1396
|
+
frame_pts: int
|
|
1397
|
+
frame_time: float
|
|
1398
|
+
|
|
1399
|
+
|
|
1400
|
+
def _scene_detect(video: str, fps: float, detector: 'SceneDetector') -> list[dict[str, int | float]]:
|
|
1401
|
+
from scenedetect import FrameTimecode # type: ignore[import-untyped]
|
|
1402
|
+
|
|
1403
|
+
with av_utils.VideoFrames(pathlib.Path(video), fps=fps) as frame_iter:
|
|
1404
|
+
video_fps = float(frame_iter.video_framerate)
|
|
1405
|
+
|
|
1406
|
+
scenes: list[dict[str, int | float]] = []
|
|
1407
|
+
frame_idx: int | None = None
|
|
1408
|
+
start_time: float | None = None # of current scene
|
|
1409
|
+
start_pts: int | None = None # of current scene
|
|
1410
|
+
|
|
1411
|
+
# in order to determine the cut frame times, we need to record frame times (chronologically) and look them
|
|
1412
|
+
# up by index; trying to derive frame times from frame indices isn't possible due to variable frame rates
|
|
1413
|
+
frame_info: list[_SceneDetectFrameInfo] = []
|
|
1414
|
+
|
|
1415
|
+
def process_cuts(cuts: list[FrameTimecode]) -> None:
|
|
1416
|
+
nonlocal frame_info, start_time, start_pts
|
|
1417
|
+
for cut_timecode in cuts:
|
|
1418
|
+
cut_frame_idx = cut_timecode.get_frames()
|
|
1419
|
+
# we expect cuts to come back in chronological order
|
|
1420
|
+
assert cut_frame_idx >= frame_info[0].frame_idx
|
|
1421
|
+
info_offset = next((i for i, info in enumerate(frame_info) if info.frame_idx == cut_frame_idx), None)
|
|
1422
|
+
assert info_offset is not None # the cut is at a previously reported frame idx
|
|
1423
|
+
info = frame_info[info_offset]
|
|
1424
|
+
scenes.append(
|
|
1425
|
+
{'start_time': start_time, 'start_pts': start_pts, 'duration': info.frame_time - start_time}
|
|
1426
|
+
)
|
|
1427
|
+
start_time = info.frame_time
|
|
1428
|
+
start_pts = info.frame_pts
|
|
1429
|
+
frame_info = frame_info[info_offset + 1 :]
|
|
1430
|
+
|
|
1431
|
+
for item in frame_iter:
|
|
1432
|
+
if start_time is None:
|
|
1433
|
+
start_time = item.time
|
|
1434
|
+
start_pts = item.pts
|
|
1435
|
+
frame_info.append(_SceneDetectFrameInfo(item.frame_idx, item.pts, item.time))
|
|
1436
|
+
frame_array = np.array(item.frame.convert('RGB'))
|
|
1437
|
+
frame_idx = item.frame_idx
|
|
1438
|
+
timecode = FrameTimecode(item.frame_idx, video_fps)
|
|
1439
|
+
cuts = detector.process_frame(timecode, frame_array)
|
|
1440
|
+
process_cuts(cuts)
|
|
1441
|
+
|
|
1442
|
+
# Post-process to capture any final scene cuts
|
|
1443
|
+
if frame_idx is not None:
|
|
1444
|
+
final_timecode = FrameTimecode(frame_idx, video_fps)
|
|
1445
|
+
final_cuts = detector.post_process(final_timecode)
|
|
1446
|
+
process_cuts(final_cuts)
|
|
1447
|
+
|
|
1448
|
+
# if we didn't detect any cuts but the video has content, add the full video as a single scene
|
|
1449
|
+
if len(scenes) == 0:
|
|
1450
|
+
scenes.append(
|
|
1451
|
+
{
|
|
1452
|
+
'start_time': start_time,
|
|
1453
|
+
'start_pts': start_pts,
|
|
1454
|
+
'duration': frame_info[-1].frame_time - start_time,
|
|
1455
|
+
}
|
|
1456
|
+
)
|
|
1457
|
+
|
|
1458
|
+
return scenes
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
def frame_iterator(
|
|
1462
|
+
video: Any,
|
|
1463
|
+
*,
|
|
1464
|
+
fps: float | None = None,
|
|
1465
|
+
num_frames: int | None = None,
|
|
1466
|
+
keyframes_only: bool = False,
|
|
1467
|
+
all_frame_attrs: bool = False,
|
|
1468
|
+
) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
|
|
1469
|
+
"""
|
|
1470
|
+
Iterator over frames of a video. At most one of `fps`, `num_frames` or `keyframes_only` may be specified. If `fps`
|
|
1471
|
+
is specified, then frames will be extracted at the specified rate (frames per second). If `num_frames` is specified,
|
|
1472
|
+
then the exact number of frames will be extracted. If neither is specified, then all frames will be extracted. The
|
|
1473
|
+
first frame of the video will always be extracted, and the remaining frames will be spaced as evenly as possible.
|
|
1474
|
+
|
|
1475
|
+
Args:
|
|
1476
|
+
fps: Number of frames to extract per second of video. This may be a fractional value, such as 0.5.
|
|
1477
|
+
If omitted or set to 0.0, or if greater than the native framerate of the video,
|
|
1478
|
+
then the framerate of the video will be used (all frames will be extracted).
|
|
1479
|
+
num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
|
|
1480
|
+
`num_frames` is greater than the number of frames in the video, all frames will be extracted.
|
|
1481
|
+
keyframes_only: If True, only extract keyframes.
|
|
1482
|
+
all_frame_attrs:
|
|
1483
|
+
If True, outputs a `pxt.Json` column `frame_attrs` with the following `pyav`-provided attributes
|
|
1484
|
+
(for more information, see `pyav`'s documentation on
|
|
1485
|
+
[VideoFrame](https://pyav.org/docs/develop/api/video.html#module-av.video.frame) and
|
|
1486
|
+
[Frame](https://pyav.org/docs/develop/api/frame.html)):
|
|
1487
|
+
|
|
1488
|
+
* `index` (`int`)
|
|
1489
|
+
* `pts` (`int | None`)
|
|
1490
|
+
* `dts` (`int | None`)
|
|
1491
|
+
* `time` (`float | None`)
|
|
1492
|
+
* `is_corrupt` (`bool`)
|
|
1493
|
+
* `key_frame` (`bool`)
|
|
1494
|
+
* `pict_type` (`int`)
|
|
1495
|
+
* `interlaced_frame` (`bool`)
|
|
1496
|
+
|
|
1497
|
+
If False, only outputs frame attributes `frame_idx`, `pos_msec`, and `pos_frame` as separate columns.
|
|
1498
|
+
|
|
1499
|
+
Examples:
|
|
1500
|
+
All these examples assume an existing table `tbl` with a column `video` of type `pxt.Video`.
|
|
1501
|
+
|
|
1502
|
+
Create a view that extracts all frames from all videos:
|
|
1503
|
+
|
|
1504
|
+
>>> pxt.create_view('all_frames', tbl, iterator=frame_iterator(tbl.video))
|
|
1505
|
+
|
|
1506
|
+
Create a view that extracts only keyframes from all videos:
|
|
1507
|
+
|
|
1508
|
+
>>> pxt.create_view('keyframes', tbl, iterator=frame_iterator(tbl.video, keyframes_only=True))
|
|
1509
|
+
|
|
1510
|
+
Create a view that extracts frames from all videos at a rate of 1 frame per second:
|
|
1511
|
+
|
|
1512
|
+
>>> pxt.create_view('one_fps_frames', tbl, iterator=frame_iterator(tbl.video, fps=1.0))
|
|
1513
|
+
|
|
1514
|
+
Create a view that extracts exactly 10 frames from each video:
|
|
1515
|
+
|
|
1516
|
+
>>> pxt.create_view('ten_frames', tbl, iterator=frame_iterator(tbl.video, num_frames=10))
|
|
1517
|
+
"""
|
|
1518
|
+
kwargs: dict[str, Any] = {}
|
|
1519
|
+
if fps is not None:
|
|
1520
|
+
kwargs['fps'] = fps
|
|
1521
|
+
if num_frames is not None:
|
|
1522
|
+
kwargs['num_frames'] = num_frames
|
|
1523
|
+
if keyframes_only:
|
|
1524
|
+
kwargs['keyframes_only'] = keyframes_only
|
|
1525
|
+
if all_frame_attrs:
|
|
1526
|
+
kwargs['all_frame_attrs'] = all_frame_attrs
|
|
1527
|
+
|
|
1528
|
+
return pxt.iterators.video.FrameIterator._create(video=video, **kwargs)
|
|
1529
|
+
|
|
1530
|
+
|
|
1531
|
+
def video_splitter(
|
|
1532
|
+
video: Any,
|
|
1533
|
+
*,
|
|
1534
|
+
duration: float | None = None,
|
|
1535
|
+
overlap: float | None = None,
|
|
1536
|
+
min_segment_duration: float | None = None,
|
|
1537
|
+
segment_times: list[float] | None = None,
|
|
1538
|
+
mode: Literal['fast', 'accurate'] = 'accurate',
|
|
1539
|
+
video_encoder: str | None = None,
|
|
1540
|
+
video_encoder_args: dict[str, Any] | None = None,
|
|
1541
|
+
) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
|
|
1542
|
+
"""
|
|
1543
|
+
Iterator over segments of a video file, which is split into segments. The segments are specified either via a
|
|
1544
|
+
fixed duration or a list of split points.
|
|
1545
|
+
|
|
1546
|
+
Args:
|
|
1547
|
+
duration: Video segment duration in seconds
|
|
1548
|
+
overlap: Overlap between consecutive segments in seconds. Only available for `mode='fast'`.
|
|
1549
|
+
min_segment_duration: Drop the last segment if it is smaller than min_segment_duration.
|
|
1550
|
+
segment_times: List of timestamps (in seconds) in video where segments should be split. Note that these are not
|
|
1551
|
+
segment durations. If all segment times are less than the duration of the video, produces exactly
|
|
1552
|
+
`len(segment_times) + 1` segments. An argument of `[]` will produce a single segment containing the
|
|
1553
|
+
entire video.
|
|
1554
|
+
mode: Segmentation mode:
|
|
1555
|
+
|
|
1556
|
+
- `'fast'`: Quick segmentation using stream copy (splits only at keyframes, approximate durations)
|
|
1557
|
+
- `'accurate'`: Precise segmentation with re-encoding (exact durations, slower)
|
|
1558
|
+
video_encoder: Video encoder to use. If not specified, uses the default encoder for the current platform.
|
|
1559
|
+
Only available for `mode='accurate'`.
|
|
1560
|
+
video_encoder_args: Additional arguments to pass to the video encoder. Only available for `mode='accurate'`.
|
|
1561
|
+
|
|
1562
|
+
Examples:
|
|
1563
|
+
All these examples assume an existing table `tbl` with a column `video` of type `pxt.Video`.
|
|
1564
|
+
|
|
1565
|
+
Create a view that splits each video into 10-second segments:
|
|
1566
|
+
|
|
1567
|
+
>>> pxt.create_view('ten_second_segments', tbl, iterator=video_splitter(tbl.video, duration=10.0))
|
|
1568
|
+
|
|
1569
|
+
Create a view that splits each video into segments at specified fixed times:
|
|
1570
|
+
|
|
1571
|
+
>>> split_times = [5.0, 15.0, 30.0]
|
|
1572
|
+
>>> pxt.create_view('custom_segments', tbl, iterator=video_splitter(tbl.video, segment_times=split_times))
|
|
1573
|
+
|
|
1574
|
+
Create a view that splits each video into segments at times specified by a column `split_times` of type
|
|
1575
|
+
`pxt.Json`, containing a list of timestamps in seconds:
|
|
1576
|
+
|
|
1577
|
+
>>> pxt.create_view('custom_segments', tbl, iterator=video_splitter(tbl.video, segment_times=tbl.split_times))
|
|
1578
|
+
"""
|
|
1579
|
+
kwargs: dict[str, Any] = {}
|
|
1580
|
+
if duration is not None:
|
|
1581
|
+
kwargs['duration'] = duration
|
|
1582
|
+
if overlap is not None:
|
|
1583
|
+
kwargs['overlap'] = overlap
|
|
1584
|
+
if min_segment_duration is not None:
|
|
1585
|
+
kwargs['min_segment_duration'] = min_segment_duration
|
|
1586
|
+
if segment_times is not None:
|
|
1587
|
+
kwargs['segment_times'] = segment_times
|
|
1588
|
+
if mode != 'accurate':
|
|
1589
|
+
kwargs['mode'] = mode
|
|
1590
|
+
if video_encoder is not None:
|
|
1591
|
+
kwargs['video_encoder'] = video_encoder
|
|
1592
|
+
if video_encoder_args is not None:
|
|
1593
|
+
kwargs['video_encoder_args'] = video_encoder_args
|
|
183
1594
|
|
|
184
|
-
return
|
|
1595
|
+
return pxt.iterators.video.VideoSplitter._create(video=video, **kwargs)
|
|
185
1596
|
|
|
186
1597
|
|
|
187
1598
|
__all__ = local_public_names(__name__)
|
|
188
1599
|
|
|
189
1600
|
|
|
190
|
-
def __dir__():
|
|
1601
|
+
def __dir__() -> list[str]:
|
|
191
1602
|
return __all__
|