pixeltable 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (36) hide show
  1. pixeltable/_version.py +1 -1
  2. pixeltable/catalog/catalog.py +76 -50
  3. pixeltable/catalog/column.py +29 -16
  4. pixeltable/catalog/insertable_table.py +2 -2
  5. pixeltable/catalog/path.py +4 -10
  6. pixeltable/catalog/table.py +51 -0
  7. pixeltable/catalog/table_version.py +40 -7
  8. pixeltable/catalog/view.py +2 -2
  9. pixeltable/config.py +1 -0
  10. pixeltable/env.py +2 -0
  11. pixeltable/exprs/column_ref.py +2 -1
  12. pixeltable/functions/__init__.py +1 -0
  13. pixeltable/functions/image.py +2 -8
  14. pixeltable/functions/reve.py +250 -0
  15. pixeltable/functions/video.py +534 -1
  16. pixeltable/globals.py +2 -1
  17. pixeltable/index/base.py +5 -18
  18. pixeltable/index/btree.py +6 -2
  19. pixeltable/index/embedding_index.py +4 -4
  20. pixeltable/metadata/schema.py +7 -32
  21. pixeltable/share/__init__.py +1 -1
  22. pixeltable/share/packager.py +22 -18
  23. pixeltable/share/protocol/__init__.py +34 -0
  24. pixeltable/share/protocol/common.py +170 -0
  25. pixeltable/share/protocol/operation_types.py +33 -0
  26. pixeltable/share/protocol/replica.py +109 -0
  27. pixeltable/share/publish.py +91 -56
  28. pixeltable/store.py +11 -15
  29. pixeltable/utils/av.py +87 -1
  30. pixeltable/utils/dbms.py +15 -11
  31. pixeltable/utils/image.py +10 -0
  32. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/METADATA +2 -1
  33. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/RECORD +36 -31
  34. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/WHEEL +0 -0
  35. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/entry_points.txt +0 -0
  36. {pixeltable-0.4.19.dist-info → pixeltable-0.4.21.dist-info}/licenses/LICENSE +0 -0
@@ -6,7 +6,7 @@ import glob
6
6
  import logging
7
7
  import pathlib
8
8
  import subprocess
9
- from typing import Any, Literal, NoReturn
9
+ from typing import TYPE_CHECKING, Any, Literal, NamedTuple, NoReturn
10
10
 
11
11
  import av
12
12
  import av.stream
@@ -19,6 +19,9 @@ from pixeltable.env import Env
19
19
  from pixeltable.utils.code import local_public_names
20
20
  from pixeltable.utils.local_store import TempStore
21
21
 
22
+ if TYPE_CHECKING:
23
+ from scenedetect.detectors import SceneDetector # type: ignore[import-untyped]
24
+
22
25
  _logger = logging.getLogger('pixeltable')
23
26
 
24
27
 
@@ -936,6 +939,536 @@ def _create_drawtext_params(
936
939
  return drawtext_params
937
940
 
938
941
 
942
+ @pxt.udf(is_method=True)
943
+ def scene_detect_adaptive(
944
+ video: pxt.Video,
945
+ *,
946
+ fps: float | None = None,
947
+ adaptive_threshold: float = 3.0,
948
+ min_scene_len: int = 15,
949
+ window_width: int = 2,
950
+ min_content_val: float = 15.0,
951
+ delta_hue: float = 1.0,
952
+ delta_sat: float = 1.0,
953
+ delta_lum: float = 1.0,
954
+ delta_edges: float = 0.0,
955
+ luma_only: bool = False,
956
+ kernel_size: int | None = None,
957
+ ) -> list[dict]:
958
+ """
959
+ Detect scene cuts in a video using PySceneDetect's
960
+ [AdaptiveDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.adaptive_detector.AdaptiveDetector).
961
+
962
+ __Requirements:__
963
+
964
+ - `pip install scenedetect`
965
+
966
+ Args:
967
+ video: The video to analyze for scene cuts.
968
+ fps: Number of frames to extract per second for analysis. If None or 0, analyzes all frames.
969
+ Lower values process faster but may miss exact scene cuts.
970
+ adaptive_threshold: Threshold that the score ratio must exceed to trigger a new scene cut.
971
+ Lower values will detect more scenes (more sensitive), higher values will detect fewer scenes.
972
+ min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
973
+ list.
974
+ window_width: Size of window (number of frames) before and after each frame to average together in order to
975
+ detect deviations from the mean. Must be at least 1.
976
+ min_content_val: Minimum threshold (float) that the content_val must exceed in order to register as a new scene.
977
+ This is calculated the same way that `scene_detect_content()` calculates frame
978
+ score based on weights/luma_only/kernel_size.
979
+ delta_hue: Weight for hue component changes. Higher values make hue changes more important.
980
+ delta_sat: Weight for saturation component changes. Higher values make saturation changes more important.
981
+ delta_lum: Weight for luminance component changes. Higher values make brightness changes more important.
982
+ delta_edges: Weight for edge detection changes. Higher values make edge changes more important.
983
+ Edge detection can help detect cuts in scenes with similar colors but different content.
984
+ luma_only: If True, only analyzes changes in the luminance (brightness) channel of the video,
985
+ ignoring color information. This can be faster and may work better for grayscale content.
986
+ kernel_size: Size of kernel to use for post edge detection filtering. If None, automatically set based on video
987
+ resolution.
988
+
989
+ Returns:
990
+ A list of dictionaries, one for each detected scene, with the following keys:
991
+
992
+ - `start_time` (float): The start time of the scene in seconds.
993
+ - `start_pts` (int): The pts of the start of the scene.
994
+ - `duration` (float): The duration of the scene in seconds.
995
+
996
+ The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
997
+
998
+ Examples:
999
+ Detect scene cuts with default parameters:
1000
+
1001
+ >>> tbl.select(tbl.video.scene_detect_adaptive()).collect()
1002
+
1003
+ Detect more scenes by lowering the threshold:
1004
+
1005
+ >>> tbl.select(tbl.video.scene_detect_adaptive(adaptive_threshold=1.5)).collect()
1006
+
1007
+ Use luminance-only detection with a longer minimum scene length:
1008
+
1009
+ >>> tbl.select(
1010
+ ... tbl.video.scene_detect_adaptive(
1011
+ ... luma_only=True,
1012
+ ... min_scene_len=30
1013
+ ... )
1014
+ ... ).collect()
1015
+
1016
+ Add scene cuts as a computed column:
1017
+
1018
+ >>> tbl.add_computed_column(
1019
+ ... scene_cuts=tbl.video.scene_detect_adaptive(adaptive_threshold=2.0)
1020
+ ... )
1021
+
1022
+ Analyze at a lower frame rate for faster processing:
1023
+
1024
+ >>> tbl.select(tbl.video.scene_detect_adaptive(fps=2.0)).collect()
1025
+ """
1026
+ Env.get().require_package('scenedetect')
1027
+ from scenedetect.detectors import AdaptiveDetector, ContentDetector
1028
+
1029
+ weights = ContentDetector.Components(
1030
+ delta_hue=delta_hue, delta_sat=delta_sat, delta_lum=delta_lum, delta_edges=delta_edges
1031
+ )
1032
+ try:
1033
+ detector = AdaptiveDetector(
1034
+ adaptive_threshold=adaptive_threshold,
1035
+ min_scene_len=min_scene_len,
1036
+ window_width=window_width,
1037
+ min_content_val=min_content_val,
1038
+ weights=weights,
1039
+ luma_only=luma_only,
1040
+ kernel_size=kernel_size,
1041
+ )
1042
+ return _scene_detect(video, fps, detector)
1043
+ except Exception as e:
1044
+ raise pxt.Error(f'scene_detect_adaptive(): failed to detect scenes: {e}') from e
1045
+
1046
+
1047
+ @pxt.udf(is_method=True)
1048
+ def scene_detect_content(
1049
+ video: pxt.Video,
1050
+ *,
1051
+ fps: float | None = None,
1052
+ threshold: float = 27.0,
1053
+ min_scene_len: int = 15,
1054
+ delta_hue: float = 1.0,
1055
+ delta_sat: float = 1.0,
1056
+ delta_lum: float = 1.0,
1057
+ delta_edges: float = 0.0,
1058
+ luma_only: bool = False,
1059
+ kernel_size: int | None = None,
1060
+ filter_mode: Literal['merge', 'suppress'] = 'merge',
1061
+ ) -> list[dict]:
1062
+ """
1063
+ Detect scene cuts in a video using PySceneDetect's
1064
+ [ContentDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.content_detector.ContentDetector).
1065
+
1066
+ __Requirements:__
1067
+
1068
+ - `pip install scenedetect`
1069
+
1070
+ Args:
1071
+ video: The video to analyze for scene cuts.
1072
+ fps: Number of frames to extract per second for analysis. If None, analyzes all frames.
1073
+ Lower values process faster but may miss exact scene cuts.
1074
+ threshold: Threshold that the weighted sum of component changes must exceed to trigger a scene cut.
1075
+ Lower values detect more scenes (more sensitive), higher values detect fewer scenes.
1076
+ min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
1077
+ list.
1078
+ delta_hue: Weight for hue component changes. Higher values make hue changes more important.
1079
+ delta_sat: Weight for saturation component changes. Higher values make saturation changes more important.
1080
+ delta_lum: Weight for luminance component changes. Higher values make brightness changes more important.
1081
+ delta_edges: Weight for edge detection changes. Higher values make edge changes more important.
1082
+ Edge detection can help detect cuts in scenes with similar colors but different content.
1083
+ luma_only: If True, only analyzes changes in the luminance (brightness) channel,
1084
+ ignoring color information. This can be faster and may work better for grayscale content.
1085
+ kernel_size: Size of kernel for expanding detected edges. Must be odd integer greater than or equal to 3. If
1086
+ None, automatically set using video resolution.
1087
+ filter_mode: How to handle fast cuts/flashes. 'merge' combines quick cuts, 'suppress' filters them out.
1088
+
1089
+ Returns:
1090
+ A list of dictionaries, one for each detected scene, with the following keys:
1091
+
1092
+ - `start_time` (float): The start time of the scene in seconds.
1093
+ - `start_pts` (int): The pts of the start of the scene.
1094
+ - `duration` (float): The duration of the scene in seconds.
1095
+
1096
+ The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
1097
+
1098
+ Examples:
1099
+ Detect scene cuts with default parameters:
1100
+
1101
+ >>> tbl.select(tbl.video.scene_detect_content()).collect()
1102
+
1103
+ Detect more scenes by lowering the threshold:
1104
+
1105
+ >>> tbl.select(tbl.video.scene_detect_content(threshold=15.0)).collect()
1106
+
1107
+ Use luminance-only detection:
1108
+
1109
+ >>> tbl.select(tbl.video.scene_detect_content(luma_only=True)).collect()
1110
+
1111
+ Emphasize edge detection for scenes with similar colors:
1112
+
1113
+ >>> tbl.select(
1114
+ ... tbl.video.scene_detect_content(
1115
+ ... delta_edges=1.0,
1116
+ ... delta_hue=0.5,
1117
+ ... delta_sat=0.5
1118
+ ... )
1119
+ ... ).collect()
1120
+
1121
+ Add scene cuts as a computed column:
1122
+
1123
+ >>> tbl.add_computed_column(
1124
+ ... scene_cuts=tbl.video.scene_detect_content(threshold=20.0)
1125
+ ... )
1126
+ """
1127
+ Env.get().require_package('scenedetect')
1128
+ from scenedetect.detectors import ContentDetector
1129
+ from scenedetect.detectors.content_detector import FlashFilter # type: ignore[import-untyped]
1130
+
1131
+ weights = ContentDetector.Components(
1132
+ delta_hue=delta_hue, delta_sat=delta_sat, delta_lum=delta_lum, delta_edges=delta_edges
1133
+ )
1134
+ filter_mode_enum = FlashFilter.Mode.MERGE if filter_mode == 'merge' else FlashFilter.Mode.SUPPRESS
1135
+
1136
+ try:
1137
+ detector = ContentDetector(
1138
+ threshold=threshold,
1139
+ min_scene_len=min_scene_len,
1140
+ weights=weights,
1141
+ luma_only=luma_only,
1142
+ kernel_size=kernel_size,
1143
+ filter_mode=filter_mode_enum,
1144
+ )
1145
+ return _scene_detect(video, fps, detector)
1146
+ except Exception as e:
1147
+ raise pxt.Error(f'scene_detect_content(): failed to detect scenes: {e}') from e
1148
+
1149
+
1150
+ @pxt.udf(is_method=True)
1151
+ def scene_detect_threshold(
1152
+ video: pxt.Video,
1153
+ *,
1154
+ fps: float | None = None,
1155
+ threshold: float = 12.0,
1156
+ min_scene_len: int = 15,
1157
+ fade_bias: float = 0.0,
1158
+ add_final_scene: bool = False,
1159
+ method: Literal['ceiling', 'floor'] = 'floor',
1160
+ ) -> list[dict]:
1161
+ """
1162
+ Detect fade-in and fade-out transitions in a video using PySceneDetect's
1163
+ [ThresholdDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.threshold_detector.ThresholdDetector).
1164
+
1165
+ ThresholdDetector identifies scenes by detecting when pixel brightness falls below or rises above
1166
+ a threshold value, suitable for detecting fade-to-black, fade-to-white, and similar transitions.
1167
+
1168
+ __Requirements:__
1169
+
1170
+ - `pip install scenedetect`
1171
+
1172
+ Args:
1173
+ video: The video to analyze for fade transitions.
1174
+ fps: Number of frames to extract per second for analysis. If None or 0, analyzes all frames.
1175
+ Lower values process faster but may miss exact transition points.
1176
+ threshold: 8-bit intensity value that each pixel value (R, G, and B) must be <= to in order to trigger a fade
1177
+ in/out.
1178
+ min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
1179
+ list.
1180
+ fade_bias: Float between -1.0 and +1.0 representing the percentage of timecode skew for the start of a scene
1181
+ (-1.0 causing a cut at the fade-to-black, 0.0 in the middle, and +1.0 causing the cut to be right at the
1182
+ position where the threshold is passed).
1183
+ add_final_scene: Boolean indicating if the video ends on a fade-out to generate an additional scene at this
1184
+ timecode.
1185
+ method: How to treat threshold when detecting fade events
1186
+ - 'ceiling': Fade out happens when frame brightness rises above threshold.
1187
+ - 'floor': Fade out happens when frame brightness falls below threshold.
1188
+
1189
+
1190
+ Returns:
1191
+ A list of dictionaries, one for each detected scene, with the following keys:
1192
+
1193
+ - `start_time` (float): The start time of the scene in seconds.
1194
+ - `start_pts` (int): The pts of the start of the scene.
1195
+ - `duration` (float): The duration of the scene in seconds.
1196
+
1197
+ The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
1198
+
1199
+ Examples:
1200
+ Detect fade-to-black transitions with default parameters:
1201
+
1202
+ >>> tbl.select(tbl.video.scene_detect_threshold()).collect()
1203
+
1204
+ Use a lower threshold to detect darker fades:
1205
+
1206
+ >>> tbl.select(tbl.video.scene_detect_threshold(threshold=8.0)).collect()
1207
+
1208
+ Detect both fade-to-black and fade-to-white using absolute method:
1209
+
1210
+ >>> tbl.select(tbl.video.scene_detect_threshold(method='absolute')).collect()
1211
+
1212
+ Add final scene boundary:
1213
+
1214
+ >>> tbl.select(
1215
+ ... tbl.video.scene_detect_threshold(
1216
+ ... add_final_scene=True
1217
+ ... )
1218
+ ... ).collect()
1219
+
1220
+ Add fade transitions as a computed column:
1221
+
1222
+ >>> tbl.add_computed_column(
1223
+ ... fade_cuts=tbl.video.scene_detect_threshold(threshold=15.0)
1224
+ ... )
1225
+ """
1226
+ Env.get().require_package('scenedetect')
1227
+ from scenedetect.detectors import ThresholdDetector
1228
+
1229
+ method_enum = ThresholdDetector.Method.FLOOR if method == 'floor' else ThresholdDetector.Method.CEILING
1230
+ try:
1231
+ detector = ThresholdDetector(
1232
+ threshold=threshold,
1233
+ min_scene_len=min_scene_len,
1234
+ fade_bias=fade_bias,
1235
+ add_final_scene=add_final_scene,
1236
+ method=method_enum,
1237
+ )
1238
+ return _scene_detect(video, fps, detector)
1239
+ except Exception as e:
1240
+ raise pxt.Error(f'scene_detect_threshold(): failed to detect scenes: {e}') from e
1241
+
1242
+
1243
+ @pxt.udf(is_method=True)
1244
+ def scene_detect_histogram(
1245
+ video: pxt.Video, *, fps: float | None = None, threshold: float = 0.05, bins: int = 256, min_scene_len: int = 15
1246
+ ) -> list[dict]:
1247
+ """
1248
+ Detect scene cuts in a video using PySceneDetect's
1249
+ [HistogramDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.histogram_detector.HistogramDetector).
1250
+
1251
+ HistogramDetector compares frame histograms on the Y (luminance) channel after YUV conversion.
1252
+ It detects scenes based on relative histogram differences and is more robust to gradual lighting
1253
+ changes than content-based detection.
1254
+
1255
+ __Requirements:__
1256
+
1257
+ - `pip install scenedetect`
1258
+
1259
+ Args:
1260
+ video: The video to analyze for scene cuts.
1261
+ fps: Number of frames to extract per second for analysis. If None or 0, analyzes all frames.
1262
+ Lower values process faster but may miss exact scene cuts.
1263
+ threshold: Maximum relative difference between 0.0 and 1.0 that the histograms can differ. Histograms are
1264
+ calculated on the Y channel after converting the frame to YUV, and normalized based on the number of bins.
1265
+ Higher differences imply greater change in content, so larger threshold values are less sensitive to cuts.
1266
+ Lower values detect more scenes (more sensitive), higher values detect fewer scenes.
1267
+ bins: Number of bins to use for histogram calculation (typically 16-256). More bins provide
1268
+ finer granularity but may be more sensitive to noise.
1269
+ min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
1270
+ list.
1271
+
1272
+
1273
+ Returns:
1274
+ A list of dictionaries, one for each detected scene, with the following keys:
1275
+
1276
+ - `start_time` (float): The start time of the scene in seconds.
1277
+ - `start_pts` (int): The pts of the start of the scene.
1278
+ - `duration` (float): The duration of the scene in seconds.
1279
+
1280
+ The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
1281
+
1282
+ Examples:
1283
+ Detect scene cuts with default parameters:
1284
+
1285
+ >>> tbl.select(tbl.video.scene_detect_histogram()).collect()
1286
+
1287
+ Detect more scenes by lowering the threshold:
1288
+
1289
+ >>> tbl.select(tbl.video.scene_detect_histogram(threshold=0.03)).collect()
1290
+
1291
+ Use fewer bins for faster processing:
1292
+
1293
+ >>> tbl.select(tbl.video.scene_detect_histogram(bins=64)).collect()
1294
+
1295
+ Use with a longer minimum scene length:
1296
+
1297
+ >>> tbl.select(
1298
+ ... tbl.video.scene_detect_histogram(
1299
+ ... min_scene_len=30
1300
+ ... )
1301
+ ... ).collect()
1302
+
1303
+ Add scene cuts as a computed column:
1304
+
1305
+ >>> tbl.add_computed_column(
1306
+ ... scene_cuts=tbl.video.scene_detect_histogram(threshold=0.04)
1307
+ ... )
1308
+ """
1309
+ Env.get().require_package('scenedetect')
1310
+ from scenedetect.detectors import HistogramDetector
1311
+
1312
+ try:
1313
+ detector = HistogramDetector(threshold=threshold, bins=bins, min_scene_len=min_scene_len)
1314
+ return _scene_detect(video, fps, detector)
1315
+ except Exception as e:
1316
+ raise pxt.Error(f'scene_detect_histogram(): failed to detect scenes: {e}') from e
1317
+
1318
+
1319
+ @pxt.udf(is_method=True)
1320
+ def scene_detect_hash(
1321
+ video: pxt.Video,
1322
+ *,
1323
+ fps: float | None = None,
1324
+ threshold: float = 0.395,
1325
+ size: int = 16,
1326
+ lowpass: int = 2,
1327
+ min_scene_len: int = 15,
1328
+ ) -> list[dict]:
1329
+ """
1330
+ Detect scene cuts in a video using PySceneDetect's
1331
+ [HashDetector](https://www.scenedetect.com/docs/latest/api/detectors.html#scenedetect.detectors.hash_detector.HashDetector).
1332
+
1333
+ HashDetector uses perceptual hashing for very fast scene detection. It computes a hash of each
1334
+ frame at reduced resolution and compares hash distances.
1335
+
1336
+ __Requirements:__
1337
+
1338
+ - `pip install scenedetect`
1339
+
1340
+ Args:
1341
+ video: The video to analyze for scene cuts.
1342
+ fps: Number of frames to extract per second for analysis. If None, analyzes all frames.
1343
+ Lower values process faster but may miss exact scene cuts.
1344
+ threshold: Value from 0.0 and 1.0 representing the relative hamming distance between the perceptual hashes of
1345
+ adjacent frames. A distance of 0 means the image is the same, and 1 means no correlation. Smaller threshold
1346
+ values thus require more correlation, making the detector more sensitive. The Hamming distance is divided
1347
+ by size x size before comparing to threshold for normalization.
1348
+ Lower values detect more scenes (more sensitive), higher values detect fewer scenes.
1349
+ size: Size of square of low frequency data to use for the DCT. Larger values are more precise but slower.
1350
+ Common values are 8, 16, or 32.
1351
+ lowpass: How much high frequency information to filter from the DCT. A value of 2 means keep lower 1/2 of the
1352
+ frequency data, 4 means only keep 1/4, etc. Larger values make the
1353
+ detector less sensitive to high-frequency details and noise.
1354
+ min_scene_len: Once a cut is detected, this many frames must pass before a new one can be added to the scene
1355
+ list.
1356
+
1357
+
1358
+ Returns:
1359
+ A list of dictionaries, one for each detected scene, with the following keys:
1360
+
1361
+ - `start_time` (float): The start time of the scene in seconds.
1362
+ - `start_pts` (int): The pts of the start of the scene.
1363
+ - `duration` (float): The duration of the scene in seconds.
1364
+
1365
+ The list is ordered chronologically. Returns the full duration of the video if no scenes are detected.
1366
+
1367
+ Examples:
1368
+ Detect scene cuts with default parameters:
1369
+
1370
+ >>> tbl.select(tbl.video.scene_detect_hash()).collect()
1371
+
1372
+ Detect more scenes by lowering the threshold:
1373
+
1374
+ >>> tbl.select(tbl.video.scene_detect_hash(threshold=0.3)).collect()
1375
+
1376
+ Use larger hash size for more precision:
1377
+
1378
+ >>> tbl.select(tbl.video.scene_detect_hash(size=32)).collect()
1379
+
1380
+ Use for fast processing with lower frame rate:
1381
+
1382
+ >>> tbl.select(
1383
+ ... tbl.video.scene_detect_hash(
1384
+ ... fps=1.0,
1385
+ ... threshold=0.4
1386
+ ... )
1387
+ ... ).collect()
1388
+
1389
+ Add scene cuts as a computed column:
1390
+
1391
+ >>> tbl.add_computed_column(
1392
+ ... scene_cuts=tbl.video.scene_detect_hash()
1393
+ ... )
1394
+ """
1395
+ Env.get().require_package('scenedetect')
1396
+ from scenedetect.detectors import HashDetector
1397
+
1398
+ try:
1399
+ detector = HashDetector(threshold=threshold, size=size, lowpass=lowpass, min_scene_len=min_scene_len)
1400
+ return _scene_detect(video, fps, detector)
1401
+ except Exception as e:
1402
+ raise pxt.Error(f'scene_detect_hash(): failed to detect scenes: {e}') from e
1403
+
1404
+
1405
+ class _SceneDetectFrameInfo(NamedTuple):
1406
+ frame_idx: int
1407
+ frame_pts: int
1408
+ frame_time: float
1409
+
1410
+
1411
+ def _scene_detect(video: str, fps: float, detector: 'SceneDetector') -> list[dict[str, int | float]]:
1412
+ from scenedetect import FrameTimecode # type: ignore[import-untyped]
1413
+
1414
+ with av_utils.VideoFrames(pathlib.Path(video), fps=fps) as frame_iter:
1415
+ video_fps = float(frame_iter.video_framerate)
1416
+
1417
+ scenes: list[dict[str, int | float]] = []
1418
+ frame_idx: int | None = None
1419
+ start_time: float | None = None # of current scene
1420
+ start_pts: int | None = None # of current scene
1421
+
1422
+ # in order to determine the cut frame times, we need to record frame times (chronologically) and look them
1423
+ # up by index; trying to derive frame times from frame indices isn't possible due to variable frame rates
1424
+ frame_info: list[_SceneDetectFrameInfo] = []
1425
+
1426
+ def process_cuts(cuts: list[FrameTimecode]) -> None:
1427
+ nonlocal frame_info, start_time, start_pts
1428
+ for cut_timecode in cuts:
1429
+ cut_frame_idx = cut_timecode.get_frames()
1430
+ # we expect cuts to come back in chronological order
1431
+ assert cut_frame_idx >= frame_info[0].frame_idx
1432
+ info_offset = next((i for i, info in enumerate(frame_info) if info.frame_idx == cut_frame_idx), None)
1433
+ assert info_offset is not None # the cut is at a previously reported frame idx
1434
+ info = frame_info[info_offset]
1435
+ scenes.append(
1436
+ {'start_time': start_time, 'start_pts': start_pts, 'duration': info.frame_time - start_time}
1437
+ )
1438
+ start_time = info.frame_time
1439
+ start_pts = info.frame_pts
1440
+ frame_info = frame_info[info_offset + 1 :]
1441
+
1442
+ for item in frame_iter:
1443
+ if start_time is None:
1444
+ start_time = item.time
1445
+ start_pts = item.pts
1446
+ frame_info.append(_SceneDetectFrameInfo(item.frame_idx, item.pts, item.time))
1447
+ frame_array = np.array(item.frame.convert('RGB'))
1448
+ frame_idx = item.frame_idx
1449
+ timecode = FrameTimecode(item.frame_idx, video_fps)
1450
+ cuts = detector.process_frame(timecode, frame_array)
1451
+ process_cuts(cuts)
1452
+
1453
+ # Post-process to capture any final scene cuts
1454
+ if frame_idx is not None:
1455
+ final_timecode = FrameTimecode(frame_idx, video_fps)
1456
+ final_cuts = detector.post_process(final_timecode)
1457
+ process_cuts(final_cuts)
1458
+
1459
+ # if we didn't detect any cuts but the video has content, add the full video as a single scene
1460
+ if len(scenes) == 0:
1461
+ scenes.append(
1462
+ {
1463
+ 'start_time': start_time,
1464
+ 'start_pts': start_pts,
1465
+ 'duration': frame_info[-1].frame_time - start_time,
1466
+ }
1467
+ )
1468
+
1469
+ return scenes
1470
+
1471
+
939
1472
  __all__ = local_public_names(__name__)
940
1473
 
941
1474
 
pixeltable/globals.py CHANGED
@@ -456,7 +456,8 @@ def replicate(remote_uri: str, local_path: str) -> catalog.Table:
456
456
  queried offline just as any other Pixeltable table.
457
457
 
458
458
  Args:
459
- remote_uri: Remote URI of the table to be replicated, such as `'pxt://org_name/my_dir/my_table'`.
459
+ remote_uri: Remote URI of the table to be replicated, such as `'pxt://org_name/my_dir/my_table'` or
460
+ `'pxt://org_name/my_dir/my_table:5'` (with version 5).
460
461
  local_path: Local table path where the replica will be created, such as `'my_new_dir.my_new_tbl'`. It can be
461
462
  the same or different from the cloud table name.
462
463
 
pixeltable/index/base.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
- from typing import Any
5
4
 
6
5
  import sqlalchemy as sql
7
6
 
@@ -19,47 +18,35 @@ class IndexBase(abc.ABC):
19
18
  the specific subclass.
20
19
  """
21
20
 
22
- @abc.abstractmethod
23
- def __init__(self, **kwargs: Any):
24
- pass
25
-
26
21
  @abc.abstractmethod
27
22
  def create_value_expr(self, c: catalog.Column) -> exprs.Expr:
28
23
  """
29
24
  Validates that the index can be created on column c and returns an expression that computes the index value.
30
25
  """
31
- pass
32
26
 
33
27
  @abc.abstractmethod
34
28
  def records_value_errors(self) -> bool:
35
29
  """True if index_value_expr() can raise errors"""
36
- pass
37
30
 
38
31
  @abc.abstractmethod
39
32
  def get_index_sa_type(self, value_col_type: ts.ColumnType) -> sql.types.TypeEngine:
40
33
  """Return the sqlalchemy type of the index value column"""
41
- pass
42
34
 
43
35
  @abc.abstractmethod
44
- def sa_index(self, index_name: str, index_value_col: catalog.Column) -> sql.Index:
45
- """Return a sqlalchemy Index instance"""
46
- pass
36
+ def sa_create_stmt(self, store_index_name: str, sa_value_col: sql.Column) -> sql.Compiled:
37
+ """Return a sqlalchemy statement for creating the index"""
47
38
 
48
39
  @abc.abstractmethod
49
40
  def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
50
41
  """Drop the index on the index value column"""
51
- pass
52
42
 
53
43
  @classmethod
54
44
  @abc.abstractmethod
55
- def display_name(cls) -> str:
56
- pass
45
+ def display_name(cls) -> str: ...
57
46
 
58
47
  @abc.abstractmethod
59
- def as_dict(self) -> dict:
60
- pass
48
+ def as_dict(self) -> dict: ...
61
49
 
62
50
  @classmethod
63
51
  @abc.abstractmethod
64
- def from_dict(cls, d: dict) -> IndexBase:
65
- pass
52
+ def from_dict(cls, d: dict) -> IndexBase: ...
pixeltable/index/btree.py CHANGED
@@ -53,8 +53,12 @@ class BtreeIndex(IndexBase):
53
53
  """Return the sqlalchemy type of the index value column"""
54
54
  return val_col_type.to_sa_type()
55
55
 
56
- def sa_index(self, store_index_name: str, index_value_col: 'catalog.Column') -> sql.Index:
57
- return sql.Index(store_index_name, index_value_col.sa_col, postgresql_using='btree')
56
+ def sa_create_stmt(self, store_index_name: str, sa_value_col: sql.Column) -> sql.Compiled:
57
+ """Return a sqlalchemy statement for creating the index"""
58
+ from sqlalchemy.dialects import postgresql
59
+
60
+ sa_idx = sql.Index(store_index_name, sa_value_col, postgresql_using='btree')
61
+ return sql.schema.CreateIndex(sa_idx, if_not_exists=True).compile(dialect=postgresql.dialect())
58
62
 
59
63
  def drop_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
60
64
  """Drop the index on the index value column"""
@@ -131,10 +131,10 @@ class EmbeddingIndex(IndexBase):
131
131
  assert vector_size is not None
132
132
  return pgvector.sqlalchemy.Vector(vector_size)
133
133
 
134
- def sa_index(self, store_index_name: str, index_value_col: 'catalog.Column') -> sql.Index:
135
- """Create the index on the index value column"""
136
- return Env.get().dbms.sa_vector_index(
137
- store_index_name, index_value_col.sa_col, metric=self.PGVECTOR_OPS[self.metric]
134
+ def sa_create_stmt(self, store_index_name: str, sa_value_col: sql.Column) -> sql.Compiled:
135
+ """Return a sqlalchemy statement for creating the index"""
136
+ return Env.get().dbms.create_vector_index_stmt(
137
+ store_index_name, sa_value_col, metric=self.PGVECTOR_OPS[self.metric]
138
138
  )
139
139
 
140
140
  def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None: