pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,15 +1,23 @@
1
+ import glob
1
2
  import logging
2
3
  import math
4
+ import subprocess
3
5
  from fractions import Fraction
4
6
  from pathlib import Path
5
- from typing import Any, Optional, Sequence
7
+ from typing import Any, Iterator, Literal
6
8
 
7
- import av # type: ignore[import-untyped]
9
+ import av
8
10
  import pandas as pd
9
11
  import PIL.Image
12
+ from av.container import InputContainer
13
+ from deprecated import deprecated
10
14
 
15
+ import pixeltable as pxt
11
16
  import pixeltable.exceptions as excs
12
17
  import pixeltable.type_system as ts
18
+ import pixeltable.utils.av as av_utils
19
+ from pixeltable.env import Env
20
+ from pixeltable.utils.local_store import TempStore
13
21
 
14
22
  from .base import ComponentIterator
15
23
 
@@ -18,42 +26,78 @@ _logger = logging.getLogger('pixeltable')
18
26
 
19
27
  class FrameIterator(ComponentIterator):
20
28
  """
21
- Iterator over frames of a video. At most one of `fps` or `num_frames` may be specified. If `fps` is specified,
22
- then frames will be extracted at the specified rate (frames per second). If `num_frames` is specified, then the
23
- exact number of frames will be extracted. If neither is specified, then all frames will be extracted. The first
24
- frame of the video will always be extracted, and the remaining frames will be spaced as evenly as possible.
29
+ Iterator over frames of a video. At most one of `fps`, `num_frames`, or `keyframes_only` may be specified. If `fps`
30
+ is specified, then frames will be extracted at the specified rate (frames per second). If `num_frames` is specified,
31
+ then the exact number of frames will be extracted. If neither is specified, then all frames will be extracted.
32
+
33
+ If `fps` or `num_frames` is large enough to exceed the native framerate of the video, then all frames will be
34
+ extracted. (Frames will never be duplicated; the maximum number of frames extracted is the total number of frames
35
+ in the video.)
25
36
 
26
37
  Args:
27
- video: URL or path of the video to use for frame extraction.
28
- fps: Number of frames to extract per second of video. This may be a fractional value, such as 0.5.
29
- If omitted or set to 0.0, then the native framerate of the video will be used (all frames will be
30
- extracted). If `fps` is greater than the frame rate of the video, an error will be raised.
31
- num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
32
- `num_frames` is greater than the number of frames in the video, all frames will be extracted.
38
+ fps: Number of frames to extract per second of video. This may be a fractional value, such as `0.5` (one frame
39
+ per two seconds). The first frame of the video will always be extracted.
40
+ num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible: the video will
41
+ be divided into `num_frames` evenly spaced intervals, and the midpoint of each interval will be used for
42
+ frame extraction.
43
+ keyframes_only: If True, only extract keyframes.
44
+ all_frame_attrs:
45
+ If True, outputs a `pxt.Json` column `frame_attrs` with the following `pyav`-provided attributes
46
+ (for more information, see `pyav`'s documentation on
47
+ [VideoFrame](https://pyav.org/docs/develop/api/video.html#module-av.video.frame) and
48
+ [Frame](https://pyav.org/docs/develop/api/frame.html)):
49
+
50
+ * `index` (`int`)
51
+ * `pts` (`int | None`)
52
+ * `dts` (`int | None`)
53
+ * `time` (`float | None`)
54
+ * `is_corrupt` (`bool`)
55
+ * `key_frame` (`bool`)
56
+ * `pict_type` (`int`)
57
+ * `interlaced_frame` (`bool`)
58
+
59
+ If False, only outputs frame attributes `frame_idx`, `pos_msec`, and `pos_frame` as separate columns.
33
60
  """
34
61
 
35
62
  # Input parameters
36
63
  video_path: Path
37
- fps: Optional[float]
38
- num_frames: Optional[int]
64
+ fps: float | None
65
+ num_frames: int | None
66
+ keyframes_only: bool
67
+ all_frame_attrs: bool
39
68
 
40
69
  # Video info
41
- container: av.container.input.InputContainer
42
- video_framerate: Fraction
70
+ container: InputContainer
43
71
  video_time_base: Fraction
44
- video_frame_count: int
45
- video_start_time: int
72
+ video_start_time: float
73
+ video_duration: float | None
74
+
75
+ # extraction info
76
+ extraction_step: float | None
77
+ next_extraction_time: float | None
46
78
 
47
- # List of frame indices to be extracted, or None to extract all frames
48
- frames_to_extract: Optional[list[int]]
79
+ # state
80
+ pos: int
81
+ video_idx: int
82
+ cur_frame: av.VideoFrame | None
49
83
 
50
- # Next frame to extract, as an iterator `pos` index. If `frames_to_extract` is None, this is the same as the
51
- # frame index in the video. Otherwise, the corresponding video index is `frames_to_extract[next_pos]`.
52
- next_pos: int
84
+ def __init__(
85
+ self,
86
+ video: str,
87
+ *,
88
+ fps: float | None = None,
89
+ num_frames: int | None = None,
90
+ keyframes_only: bool = False,
91
+ all_frame_attrs: bool = False,
92
+ ):
93
+ if int(fps is not None) + int(num_frames is not None) + int(keyframes_only) > 1:
94
+ raise excs.Error('At most one of `fps`, `num_frames` or `keyframes_only` may be specified')
53
95
 
54
- def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
55
- if fps is not None and num_frames is not None:
56
- raise excs.Error('At most one of `fps` or `num_frames` may be specified')
96
+ if fps is not None and fps < 0.0:
97
+ raise excs.Error('`fps` must be a non-negative number')
98
+
99
+ if fps == 0.0:
100
+ fps = None # treat 0.0 as unspecified
57
101
 
58
102
  video_path = Path(video)
59
103
  assert video_path.exists() and video_path.is_file()
@@ -61,55 +105,57 @@ class FrameIterator(ComponentIterator):
61
105
  self.container = av.open(str(video_path))
62
106
  self.fps = fps
63
107
  self.num_frames = num_frames
108
+ self.keyframes_only = keyframes_only
109
+ self.all_frame_attrs = all_frame_attrs
64
110
 
65
- self.video_framerate = self.container.streams.video[0].average_rate
66
111
  self.video_time_base = self.container.streams.video[0].time_base
67
- self.video_start_time = self.container.streams.video[0].start_time or 0
68
-
69
- # Determine the number of frames in the video
70
- self.video_frame_count = self.container.streams.video[0].frames
71
- if self.video_frame_count == 0:
72
- # The video codec does not provide a frame count in the standard `frames` field. Try some other methods.
73
- metadata: dict = self.container.streams.video[0].metadata
74
- if 'NUMBER_OF_FRAMES' in metadata:
75
- self.video_frame_count = int(metadata['NUMBER_OF_FRAMES'])
76
- elif 'DURATION' in metadata:
77
- # As a last resort, calculate the frame count from the stream duration.
78
- duration = metadata['DURATION']
79
- assert isinstance(duration, str)
80
- seconds = pd.to_timedelta(duration).total_seconds()
81
- # Usually the duration and framerate are precise enough for this calculation to be accurate, but if
82
- # we encounter a case where it's off by one due to a rounding error, that's ok; we only use this
83
- # to determine the positions of the sampled frames when `fps` or `num_frames` is specified.
84
- self.video_frame_count = round(seconds * self.video_framerate)
85
- else:
86
- raise excs.Error(f'Video {video}: failed to get number of frames')
87
112
 
88
- if num_frames is not None:
89
- # specific number of frames
90
- if num_frames > self.video_frame_count:
91
- # Extract all frames
92
- self.frames_to_extract = None
93
- else:
94
- spacing = float(self.video_frame_count) / float(num_frames)
95
- self.frames_to_extract = list(round(i * spacing) for i in range(num_frames))
96
- assert len(self.frames_to_extract) == num_frames
113
+ start_time = self.container.streams.video[0].start_time or 0
114
+ self.video_start_time = float(start_time * self.video_time_base)
115
+
116
+ duration_pts: int | None = self.container.streams.video[0].duration
117
+ if duration_pts is not None:
118
+ self.video_duration = float(duration_pts * self.video_time_base)
97
119
  else:
98
- if fps is None or fps == 0.0:
99
- # Extract all frames
100
- self.frames_to_extract = None
101
- elif fps > float(self.video_framerate):
102
- raise excs.Error(
103
- f'Video {video}: requested fps ({fps}) exceeds that of the video ({float(self.video_framerate)})'
104
- )
120
+ # As a backup, try to calculate duration from DURATION metadata field
121
+ metadata = self.container.streams.video[0].metadata
122
+ duration_field = metadata.get('DURATION') # A string like "00:01:23"
123
+ if duration_field is not None:
124
+ assert isinstance(duration_field, str)
125
+ self.video_duration = pd.to_timedelta(duration_field).total_seconds()
105
126
  else:
106
- # Extract frames at the implied frequency
107
- freq = fps / float(self.video_framerate)
108
- n = math.ceil(self.video_frame_count * freq) # number of frames to extract
109
- self.frames_to_extract = list(round(i / freq) for i in range(n))
127
+ # TODO: Anything we can do here? Other methods of determining the duration are expensive and
128
+ # not so appropriate for an iterator initializer.
129
+ self.video_duration = None
110
130
 
111
- _logger.debug(f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames}')
112
- self.next_pos = 0
131
+ if self.video_duration is None and self.num_frames is not None:
132
+ raise excs.Error(f'Could not determine duration of video: {video}')
133
+
134
+ # If self.fps or self.num_frames is specified, we cannot rely on knowing in advance which frame positions will
135
+ # be needed, since for variable framerate videos we do not know in advance the precise timestamp of each frame.
136
+ # The strategy is: predetermine a list of "extraction times", the idealized timestamps of the frames we want to
137
+ # materialize. As we later iterate through the frames, we will choose the frames that are closest to these
138
+ # idealized timestamps.
139
+
140
+ self.pos = 0
141
+ self.video_idx = 0
142
+ if self.num_frames is not None:
143
+ # Divide the video duration into num_frames evenly spaced intervals. The extraction times are the midpoints
144
+ # of those intervals.
145
+ self.extraction_step = (self.video_duration - self.video_start_time) / self.num_frames
146
+ self.next_extraction_time = self.video_start_time + self.extraction_step / 2
147
+ elif self.fps is not None:
148
+ self.extraction_step = 1 / self.fps
149
+ self.next_extraction_time = self.video_start_time
150
+ else:
151
+ self.extraction_step = None
152
+ self.next_extraction_time = None
153
+
154
+ _logger.debug(
155
+ f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames} '
156
+ f'keyframes_only={self.keyframes_only}'
157
+ )
158
+ self.cur_frame = self.next_frame()
113
159
 
114
160
  @classmethod
115
161
  def input_schema(cls) -> dict[str, ts.ColumnType]:
@@ -117,77 +163,427 @@ class FrameIterator(ComponentIterator):
117
163
  'video': ts.VideoType(nullable=False),
118
164
  'fps': ts.FloatType(nullable=True),
119
165
  'num_frames': ts.IntType(nullable=True),
166
+ 'keyframes_only': ts.BoolType(nullable=False),
167
+ 'all_frame_attrs': ts.BoolType(nullable=False),
120
168
  }
121
169
 
122
170
  @classmethod
123
171
  def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
124
- return {
125
- 'frame_idx': ts.IntType(),
126
- 'pos_msec': ts.FloatType(),
127
- 'pos_frame': ts.IntType(),
128
- 'frame': ts.ImageType(),
129
- }, ['frame']
172
+ attrs: dict[str, ts.ColumnType]
173
+ fps = kwargs.get('fps')
174
+ if fps is not None and (not isinstance(fps, (int, float)) or fps < 0.0):
175
+ raise excs.Error('`fps` must be a non-negative number')
130
176
 
131
- def __next__(self) -> dict[str, Any]:
132
- # Determine the frame index in the video corresponding to the iterator index `next_pos`;
133
- # the frame at this index is the one we want to extract next
134
- if self.frames_to_extract is None:
135
- next_video_idx = self.next_pos # we're extracting all frames
136
- elif self.next_pos >= len(self.frames_to_extract):
137
- raise StopIteration
177
+ if kwargs.get('all_frame_attrs'):
178
+ attrs = {'frame_attrs': ts.JsonType()}
138
179
  else:
139
- next_video_idx = self.frames_to_extract[self.next_pos]
140
-
141
- # We are searching for the frame at the index implied by `next_pos`. Step through the video until we
142
- # find it. There are two reasons why it might not be the immediate next frame in the video:
143
- # (1) `fps` or `num_frames` was specified as an iterator argument; or
144
- # (2) we just did a seek, and the desired frame is not a keyframe.
145
- # TODO: In case (1) it will usually be fastest to step through the frames until we find the one we're
146
- # looking for. But in some cases it may be faster to do a seek; for example, when `fps` is very
147
- # low and there are multiple keyframes in between each frame we want to extract (imagine extracting
148
- # 10 frames from an hourlong video).
180
+ attrs = {'frame_idx': ts.IntType(), 'pos_msec': ts.FloatType(), 'pos_frame': ts.IntType()}
181
+ return {**attrs, 'frame': ts.ImageType()}, ['frame']
182
+
183
+ def next_frame(self) -> av.VideoFrame | None:
184
+ try:
185
+ return next(self.container.decode(video=0))
186
+ except EOFError:
187
+ return None
188
+
189
+ def __next__(self) -> dict[str, Any]:
149
190
  while True:
150
- try:
151
- frame = next(self.container.decode(video=0))
152
- except EOFError:
191
+ if self.cur_frame is None:
153
192
  raise StopIteration
154
- # Compute the index of the current frame in the video based on the presentation timestamp (pts);
155
- # this ensures we have a canonical understanding of frame index, regardless of how we got here
156
- # (seek or iteration)
157
- pts = frame.pts - self.video_start_time
158
- video_idx = round(pts * self.video_time_base * self.video_framerate)
159
- assert isinstance(video_idx, int)
160
- if video_idx < next_video_idx:
161
- # We haven't reached the desired frame yet
193
+
194
+ next_frame = self.next_frame()
195
+
196
+ if self.keyframes_only and not self.cur_frame.key_frame:
197
+ self.cur_frame = next_frame
198
+ self.video_idx += 1
162
199
  continue
163
200
 
164
- # Sanity check that we're at the right frame.
165
- if video_idx != next_video_idx:
166
- raise excs.Error(f'Frame {next_video_idx} is missing from the video (video file is corrupt)')
167
- img = frame.to_image()
201
+ cur_frame_pts = self.cur_frame.pts
202
+ cur_frame_time = float(cur_frame_pts * self.video_time_base)
203
+
204
+ if self.extraction_step is not None:
205
+ # We are targeting a specified list of extraction times (because fps or num_frames was specified).
206
+ assert self.next_extraction_time is not None
207
+
208
+ if next_frame is None:
209
+ # cur_frame is the last frame of the video. If it is before the next extraction time, then we
210
+ # have reached the end of the video.
211
+ if cur_frame_time < self.next_extraction_time:
212
+ raise StopIteration
213
+ else:
214
+ # The extraction time represents the idealized timestamp of the next frame we want to extract.
215
+ # If next_frame is *closer* to it than cur_frame, then we skip cur_frame.
216
+ # The following logic handles all three cases:
217
+ # - next_extraction_time is before cur_frame_time (never skips)
218
+ # - next_extraction_time is after next_frame_time (always skips)
219
+ # - next_extraction_time is between cur_frame_time and next_frame_time (depends on which is closer)
220
+ next_frame_pts = next_frame.pts
221
+ next_frame_time = float(next_frame_pts * self.video_time_base)
222
+ if next_frame_time - self.next_extraction_time < self.next_extraction_time - cur_frame_time:
223
+ self.cur_frame = next_frame
224
+ self.video_idx += 1
225
+ continue
226
+
227
+ img = self.cur_frame.to_image()
168
228
  assert isinstance(img, PIL.Image.Image)
169
- pos_msec = float(pts * self.video_time_base * 1000)
170
- result = {
171
- 'frame_idx': self.next_pos,
172
- 'pos_msec': pos_msec,
173
- 'pos_frame': video_idx,
174
- 'frame': img,
175
- }
176
- self.next_pos += 1
229
+ result: dict[str, Any] = {'frame': img}
230
+ if self.all_frame_attrs:
231
+ attrs = {
232
+ 'index': self.video_idx,
233
+ 'pts': cur_frame_pts,
234
+ 'dts': self.cur_frame.dts,
235
+ 'time': float(cur_frame_pts * self.video_time_base),
236
+ 'is_corrupt': self.cur_frame.is_corrupt,
237
+ 'key_frame': self.cur_frame.key_frame,
238
+ 'pict_type': self.cur_frame.pict_type,
239
+ 'interlaced_frame': self.cur_frame.interlaced_frame,
240
+ }
241
+ result['frame_attrs'] = attrs
242
+ else:
243
+ pos_msec = float(cur_frame_pts * self.video_time_base * 1000 - self.video_start_time)
244
+ result.update({'frame_idx': self.pos, 'pos_msec': pos_msec, 'pos_frame': self.video_idx})
245
+
246
+ self.cur_frame = next_frame
247
+ self.video_idx += 1
248
+
249
+ self.pos += 1
250
+ if self.extraction_step is not None:
251
+ self.next_extraction_time += self.extraction_step
252
+
177
253
  return result
178
254
 
179
255
  def close(self) -> None:
180
256
  self.container.close()
181
257
 
182
- def set_pos(self, pos: int) -> None:
183
- if pos == self.next_pos:
184
- return # already there
185
-
186
- video_idx = pos if self.frames_to_extract is None else self.frames_to_extract[pos]
187
- _logger.debug(f'seeking to frame number {video_idx} (at iterator index {pos})')
188
- # compute the frame position in time_base units
189
- seek_pos = int(video_idx / self.video_framerate / self.video_time_base + self.video_start_time)
190
- # This will seek to the nearest keyframe before the desired frame. If the frame being sought is not a keyframe,
191
- # then the iterator will step forward to the desired frame on the subsequent call to next().
192
- self.container.seek(seek_pos, backward=True, stream=self.container.streams.video[0])
193
- self.next_pos = pos
258
+ def set_pos(self, pos: int, **kwargs: Any) -> None:
259
+ assert next(iter(kwargs.values()), None) is not None
260
+
261
+ if self.pos == pos:
262
+ # Nothing to do
263
+ return
264
+
265
+ self.pos = pos
266
+
267
+ seek_time: float
268
+ if 'pos_msec' in kwargs:
269
+ self.video_idx = kwargs['pos_frame']
270
+ seek_time = kwargs['pos_msec'] / 1000.0 + self.video_start_time
271
+ else:
272
+ assert 'frame_attrs' in kwargs
273
+ self.video_idx = kwargs['frame_attrs']['index']
274
+ seek_time = kwargs['frame_attrs']['time']
275
+
276
+ assert isinstance(self.video_idx, int)
277
+ assert isinstance(seek_time, float)
278
+
279
+ seek_pts = math.floor(seek_time / self.video_time_base)
280
+ self.container.seek(seek_pts, backward=True, stream=self.container.streams.video[0])
281
+
282
+ self.cur_frame = self.next_frame()
283
+ while self.cur_frame is not None and float(self.cur_frame.pts * self.video_time_base) < seek_time - 1e-3:
284
+ self.cur_frame = self.next_frame()
285
+ assert self.cur_frame is None or abs(float(self.cur_frame.pts * self.video_time_base) - seek_time) < 1e-3
286
+
287
+ @classmethod
288
+ @deprecated('create() is deprecated; use `pixeltable.functions.video.frame_iterator` instead', version='0.5.6')
289
+ def create(cls, **kwargs: Any) -> tuple[type[ComponentIterator], dict[str, Any]]:
290
+ return super()._create(**kwargs)
291
+
292
+
293
+ class VideoSplitter(ComponentIterator):
294
+ """
295
+ Iterator over segments of a video file, which is split into segments. The segments are specified either via a
296
+ fixed duration or a list of split points.
297
+
298
+ Args:
299
+ duration: Video segment duration in seconds
300
+ overlap: Overlap between consecutive segments in seconds. Only available for `mode='fast'`.
301
+ min_segment_duration: Drop the last segment if it is smaller than min_segment_duration.
302
+ segment_times: List of timestamps (in seconds) in video where segments should be split. Note that these are not
303
+ segment durations. If all segment times are less than the duration of the video, produces exactly
304
+ `len(segment_times) + 1` segments. An argument of `[]` will produce a single segment containing the
305
+ entire video.
306
+ mode: Segmentation mode:
307
+ - `'fast'`: Quick segmentation using stream copy (splits only at keyframes, approximate durations)
308
+ - `'accurate'`: Precise segmentation with re-encoding (exact durations, slower)
309
+ video_encoder: Video encoder to use. If not specified, uses the default encoder for the current platform.
310
+ Only available for `mode='accurate'`.
311
+ video_encoder_args: Additional arguments to pass to the video encoder. Only available for `mode='accurate'`.
312
+ """
313
+
314
+ # Input parameters
315
+ video_path: Path
316
+ segment_duration: float | None
317
+ segment_times: list[float] | None # [] is valid
318
+ overlap: float
319
+ min_segment_duration: float
320
+ video_encoder: str | None
321
+ video_encoder_args: dict[str, Any] | None
322
+
323
+ # Video metadata
324
+ video_time_base: Fraction
325
+
326
+ output_iter: Iterator[dict[str, Any]]
327
+
328
+ def __init__(
329
+ self,
330
+ video: str,
331
+ *,
332
+ duration: float | None = None,
333
+ overlap: float | None = None,
334
+ min_segment_duration: float | None = None,
335
+ segment_times: list[float] | None = None,
336
+ mode: Literal['fast', 'accurate'] = 'accurate',
337
+ video_encoder: str | None = None,
338
+ video_encoder_args: dict[str, Any] | None = None,
339
+ ):
340
+ Env.get().require_binary('ffmpeg')
341
+ self._check_args(
342
+ duration, segment_times, overlap, min_segment_duration, mode, video_encoder, video_encoder_args
343
+ )
344
+ assert (duration is not None) != (segment_times is not None)
345
+ if duration is not None:
346
+ assert duration > 0.0
347
+ assert duration >= min_segment_duration
348
+ assert overlap is None or overlap < duration
349
+
350
+ video_path = Path(video)
351
+ assert video_path.exists() and video_path.is_file()
352
+
353
+ self.video_path = video_path
354
+ self.segment_duration = duration
355
+ self.overlap = overlap if overlap is not None else 0.0
356
+ self.min_segment_duration = min_segment_duration if min_segment_duration is not None else 0.0
357
+ self.segment_times = segment_times
358
+ self.video_encoder = video_encoder
359
+ self.video_encoder_args = video_encoder_args
360
+
361
+ if self.segment_times is not None and len(self.segment_times) == 0:
362
+ self.output_iter = self.complete_video_iter()
363
+ else:
364
+ self.output_iter = self.fast_iter() if mode == 'fast' else self.accurate_iter()
365
+
366
+ with av.open(str(video_path)) as container:
367
+ self.video_time_base = container.streams.video[0].time_base
368
+
369
+ # TODO: check types of args
370
+
371
+ @classmethod
372
+ def input_schema(cls) -> dict[str, ts.ColumnType]:
373
+ return {
374
+ 'video': ts.VideoType(nullable=False),
375
+ 'duration': ts.FloatType(nullable=True),
376
+ 'overlap': ts.FloatType(nullable=True),
377
+ 'min_segment_duration': ts.FloatType(nullable=True),
378
+ 'segment_times': ts.JsonType(nullable=True),
379
+ 'mode': ts.StringType(nullable=False),
380
+ 'video_encoder': ts.StringType(nullable=True),
381
+ 'video_encoder_args': ts.JsonType(nullable=True),
382
+ }
383
+
384
+ @classmethod
385
+ def _check_args(
386
+ cls,
387
+ segment_duration: Any,
388
+ segment_times: Any,
389
+ overlap: Any,
390
+ min_segment_duration: Any,
391
+ mode: Any,
392
+ video_encoder: Any,
393
+ video_encoder_args: Any,
394
+ ) -> None:
395
+ if segment_duration is None and segment_times is None:
396
+ raise excs.Error('Must specify either duration or segment_times')
397
+ if segment_duration is not None and segment_times is not None:
398
+ raise excs.Error('duration and segment_times cannot both be specified')
399
+ if segment_times is not None and overlap is not None:
400
+ raise excs.Error('overlap cannot be specified with segment_times')
401
+ if segment_duration is not None and isinstance(segment_duration, (int, float)):
402
+ if segment_duration <= 0.0:
403
+ raise excs.Error(f'duration must be a positive number: {segment_duration}')
404
+ if (
405
+ min_segment_duration is not None
406
+ and isinstance(min_segment_duration, (int, float))
407
+ and segment_duration < min_segment_duration
408
+ ):
409
+ raise excs.Error(
410
+ f'duration must be at least min_segment_duration: {segment_duration} < {min_segment_duration}'
411
+ )
412
+ if overlap is not None and isinstance(overlap, (int, float)) and overlap >= segment_duration:
413
+ raise excs.Error(f'overlap must be less than duration: {overlap} >= {segment_duration}')
414
+ if mode == 'accurate' and overlap is not None:
415
+ raise excs.Error("Cannot specify overlap for mode='accurate'")
416
+ if mode == 'fast':
417
+ if video_encoder is not None:
418
+ raise excs.Error("Cannot specify video_encoder for mode='fast'")
419
+ if video_encoder_args is not None:
420
+ raise excs.Error("Cannot specify video_encoder_args for mode='fast'")
421
+
422
+ @classmethod
423
+ def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
424
+ param_names = ['duration', 'overlap', 'min_segment_duration', 'segment_times']
425
+ params = dict(zip(param_names, args))
426
+ params.update(kwargs)
427
+
428
+ segment_duration = params.get('duration')
429
+ segment_times = params.get('segment_times')
430
+ overlap = params.get('overlap')
431
+ min_segment_duration = params.get('min_segment_duration')
432
+ mode = params.get('mode', 'accurate')
433
+ video_encoder = params.get('video_encoder')
434
+ video_encoder_args = params.get('video_encoder_args')
435
+ cls._check_args(
436
+ segment_duration, segment_times, overlap, min_segment_duration, mode, video_encoder, video_encoder_args
437
+ )
438
+
439
+ return {
440
+ 'segment_start': ts.FloatType(nullable=True),
441
+ 'segment_start_pts': ts.IntType(nullable=True),
442
+ 'segment_end': ts.FloatType(nullable=True),
443
+ 'segment_end_pts': ts.IntType(nullable=True),
444
+ 'video_segment': ts.VideoType(nullable=False),
445
+ }, []
446
+
447
+ def complete_video_iter(self) -> Iterator[dict[str, Any]]:
448
+ """Returns the entire video as a single segment"""
449
+ assert len(self.segment_times) == 0
450
+
451
+ with av.open(str(self.video_path)) as container:
452
+ video_stream = container.streams.video[0]
453
+ start_ts = (
454
+ float(video_stream.start_time * video_stream.time_base)
455
+ if video_stream.start_time is not None and video_stream.time_base is not None
456
+ else 0.0
457
+ )
458
+ end_pts = (
459
+ video_stream.start_time + video_stream.duration
460
+ if video_stream.start_time is not None and video_stream.duration is not None
461
+ else None
462
+ )
463
+ end_ts = (
464
+ float(end_pts * video_stream.time_base)
465
+ if end_pts is not None and video_stream.time_base is not None
466
+ else 0.0
467
+ )
468
+ result = {
469
+ 'segment_start': start_ts,
470
+ 'segment_start_pts': video_stream.start_time,
471
+ 'segment_end': end_ts,
472
+ 'segment_end_pts': end_pts,
473
+ 'video_segment': str(self.video_path),
474
+ }
475
+ yield result
476
+
477
+ def fast_iter(self) -> Iterator[dict[str, Any]]:
478
+ segment_path: str = ''
479
+ assert self.segment_times is None or len(self.segment_times) > 0
480
+
481
+ try:
482
+ start_time = 0.0
483
+ start_pts = 0
484
+ segment_idx = 0
485
+ while True:
486
+ target_duration: float | None
487
+ if self.segment_duration is not None:
488
+ target_duration = self.segment_duration
489
+ elif self.segment_times is not None and segment_idx < len(self.segment_times):
490
+ target_duration = self.segment_times[segment_idx] - start_time
491
+ else:
492
+ target_duration = None # the rest of the video
493
+
494
+ segment_path = str(TempStore.create_path(extension='.mp4'))
495
+ cmd = av_utils.ffmpeg_clip_cmd(str(self.video_path), segment_path, start_time, target_duration)
496
+ _ = subprocess.run(cmd, capture_output=True, text=True, check=True)
497
+
498
+ # use the actual duration
499
+ segment_duration = av_utils.get_video_duration(segment_path)
500
+ if segment_duration - self.overlap == 0.0 or segment_duration < self.min_segment_duration:
501
+ # we're done
502
+ Path(segment_path).unlink()
503
+ return
504
+
505
+ segment_end = start_time + segment_duration
506
+ segment_end_pts = start_pts + round(segment_duration / self.video_time_base)
507
+ result = {
508
+ 'segment_start': start_time,
509
+ 'segment_start_pts': start_pts,
510
+ 'segment_end': segment_end,
511
+ 'segment_end_pts': segment_end_pts,
512
+ 'video_segment': segment_path,
513
+ }
514
+ yield result
515
+
516
+ start_time = segment_end - self.overlap
517
+ start_pts = segment_end_pts - round(self.overlap / self.video_time_base)
518
+
519
+ segment_idx += 1
520
+ if self.segment_times is not None and segment_idx > len(self.segment_times):
521
+ # We've created all segments including the final segment after the last segment_time
522
+ break
523
+
524
+ except subprocess.CalledProcessError as e:
525
+ if segment_path and Path(segment_path).exists():
526
+ Path(segment_path).unlink()
527
+ error_msg = f'ffmpeg failed with return code {e.returncode}'
528
+ if e.stderr:
529
+ error_msg += f': {e.stderr.strip()}'
530
+ raise pxt.Error(error_msg) from e
531
+
532
+ def accurate_iter(self) -> Iterator[dict[str, Any]]:
533
+ assert self.segment_times is None or len(self.segment_times) > 0
534
+ base_path = TempStore.create_path(extension='')
535
+ # Use ffmpeg -f segment for accurate segmentation with re-encoding
536
+ output_pattern = f'{base_path}_segment_%04d.mp4'
537
+ cmd = av_utils.ffmpeg_segment_cmd(
538
+ str(self.video_path),
539
+ output_pattern,
540
+ segment_duration=self.segment_duration,
541
+ segment_times=self.segment_times,
542
+ video_encoder=self.video_encoder,
543
+ video_encoder_args=self.video_encoder_args,
544
+ )
545
+
546
+ try:
547
+ _ = subprocess.run(cmd, capture_output=True, text=True, check=True)
548
+ output_paths = sorted(glob.glob(f'{base_path}_segment_*.mp4'))
549
+ # TODO: is this actually an error?
550
+ # if len(output_paths) == 0:
551
+ # stderr_output = result.stderr.strip() if result.stderr is not None else ''
552
+ # raise pxt.Error(
553
+ # f'ffmpeg failed to create output files for commandline: {" ".join(cmd)}\n{stderr_output}'
554
+ # )
555
+ start_time = 0.0
556
+ start_pts = 0
557
+ for segment_path in output_paths:
558
+ segment_duration = av_utils.get_video_duration(segment_path)
559
+ if segment_duration < self.min_segment_duration:
560
+ Path(segment_path).unlink()
561
+ return
562
+
563
+ result = {
564
+ 'segment_start': start_time,
565
+ 'segment_start_pts': start_pts,
566
+ 'segment_end': start_time + segment_duration,
567
+ 'segment_end_pts': start_pts + round(segment_duration / self.video_time_base),
568
+ 'video_segment': segment_path,
569
+ }
570
+ yield result
571
+ start_time += segment_duration
572
+ start_pts += round(segment_duration / self.video_time_base)
573
+
574
+ except subprocess.CalledProcessError as e:
575
+ error_msg = f'ffmpeg failed with return code {e.returncode}'
576
+ if e.stderr:
577
+ error_msg += f': {e.stderr.strip()}'
578
+ raise pxt.Error(error_msg) from e
579
+
580
+ def __next__(self) -> dict[str, Any]:
581
+ return next(self.output_iter)
582
+
583
+ def close(self) -> None:
584
+ pass
585
+
586
+ @classmethod
587
+ @deprecated('create() is deprecated; use `pixeltable.functions.video.video_splitter` instead', version='0.5.6')
588
+ def create(cls, **kwargs: Any) -> tuple[type[ComponentIterator], dict[str, Any]]:
589
+ return super()._create(**kwargs)