pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,15 +1,23 @@
1
+ import glob
1
2
  import logging
2
3
  import math
4
+ import subprocess
3
5
  from fractions import Fraction
4
6
  from pathlib import Path
5
- from typing import Any, Optional
7
+ from typing import Any, Iterator, Literal
6
8
 
7
9
  import av
8
10
  import pandas as pd
9
11
  import PIL.Image
12
+ from av.container import InputContainer
13
+ from deprecated import deprecated
10
14
 
15
+ import pixeltable as pxt
11
16
  import pixeltable.exceptions as excs
12
17
  import pixeltable.type_system as ts
18
+ import pixeltable.utils.av as av_utils
19
+ from pixeltable.env import Env
20
+ from pixeltable.utils.local_store import TempStore
13
21
 
14
22
  from .base import ComponentIterator
15
23
 
@@ -18,41 +26,78 @@ _logger = logging.getLogger('pixeltable')
18
26
 
19
27
  class FrameIterator(ComponentIterator):
20
28
  """
21
- Iterator over frames of a video. At most one of `fps` or `num_frames` may be specified. If `fps` is specified,
22
- then frames will be extracted at the specified rate (frames per second). If `num_frames` is specified, then the
23
- exact number of frames will be extracted. If neither is specified, then all frames will be extracted. The first
24
- frame of the video will always be extracted, and the remaining frames will be spaced as evenly as possible.
29
+ Iterator over frames of a video. At most one of `fps`, `num_frames`, or `keyframes_only` may be specified. If `fps`
30
+ is specified, then frames will be extracted at the specified rate (frames per second). If `num_frames` is specified,
31
+ then the exact number of frames will be extracted. If neither is specified, then all frames will be extracted.
32
+
33
+ If `fps` or `num_frames` is large enough to exceed the native framerate of the video, then all frames will be
34
+ extracted. (Frames will never be duplicated; the maximum number of frames extracted is the total number of frames
35
+ in the video.)
25
36
 
26
37
  Args:
27
- fps: Number of frames to extract per second of video. This may be a fractional value, such as 0.5.
28
- If omitted or set to 0.0, then the native framerate of the video will be used (all frames will be
29
- extracted). If `fps` is greater than the frame rate of the video, an error will be raised.
30
- num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
31
- `num_frames` is greater than the number of frames in the video, all frames will be extracted.
38
+ fps: Number of frames to extract per second of video. This may be a fractional value, such as `0.5` (one frame
39
+ per two seconds). The first frame of the video will always be extracted.
40
+ num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible: the video will
41
+ be divided into `num_frames` evenly spaced intervals, and the midpoint of each interval will be used for
42
+ frame extraction.
43
+ keyframes_only: If True, only extract keyframes.
44
+ all_frame_attrs:
45
+ If True, outputs a `pxt.Json` column `frame_attrs` with the following `pyav`-provided attributes
46
+ (for more information, see `pyav`'s documentation on
47
+ [VideoFrame](https://pyav.org/docs/develop/api/video.html#module-av.video.frame) and
48
+ [Frame](https://pyav.org/docs/develop/api/frame.html)):
49
+
50
+ * `index` (`int`)
51
+ * `pts` (`int | None`)
52
+ * `dts` (`int | None`)
53
+ * `time` (`float | None`)
54
+ * `is_corrupt` (`bool`)
55
+ * `key_frame` (`bool`)
56
+ * `pict_type` (`int`)
57
+ * `interlaced_frame` (`bool`)
58
+
59
+ If False, only outputs frame attributes `frame_idx`, `pos_msec`, and `pos_frame` as separate columns.
32
60
  """
33
61
 
34
62
  # Input parameters
35
63
  video_path: Path
36
- fps: Optional[float]
37
- num_frames: Optional[int]
64
+ fps: float | None
65
+ num_frames: int | None
66
+ keyframes_only: bool
67
+ all_frame_attrs: bool
38
68
 
39
69
  # Video info
40
- container: av.container.input.InputContainer
41
- video_framerate: Fraction
70
+ container: InputContainer
42
71
  video_time_base: Fraction
43
- video_frame_count: int
44
- video_start_time: int
72
+ video_start_time: float
73
+ video_duration: float | None
45
74
 
46
- # List of frame indices to be extracted, or None to extract all frames
47
- frames_to_extract: Optional[list[int]]
75
+ # extraction info
76
+ extraction_step: float | None
77
+ next_extraction_time: float | None
48
78
 
49
- # Next frame to extract, as an iterator `pos` index. If `frames_to_extract` is None, this is the same as the
50
- # frame index in the video. Otherwise, the corresponding video index is `frames_to_extract[next_pos]`.
51
- next_pos: int
79
+ # state
80
+ pos: int
81
+ video_idx: int
82
+ cur_frame: av.VideoFrame | None
52
83
 
53
- def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
54
- if fps is not None and num_frames is not None:
55
- raise excs.Error('At most one of `fps` or `num_frames` may be specified')
84
+ def __init__(
85
+ self,
86
+ video: str,
87
+ *,
88
+ fps: float | None = None,
89
+ num_frames: int | None = None,
90
+ keyframes_only: bool = False,
91
+ all_frame_attrs: bool = False,
92
+ ):
93
+ if int(fps is not None) + int(num_frames is not None) + int(keyframes_only) > 1:
94
+ raise excs.Error('At most one of `fps`, `num_frames` or `keyframes_only` may be specified')
95
+
96
+ if fps is not None and fps < 0.0:
97
+ raise excs.Error('`fps` must be a non-negative number')
98
+
99
+ if fps == 0.0:
100
+ fps = None # treat 0.0 as unspecified
56
101
 
57
102
  video_path = Path(video)
58
103
  assert video_path.exists() and video_path.is_file()
@@ -60,54 +105,57 @@ class FrameIterator(ComponentIterator):
60
105
  self.container = av.open(str(video_path))
61
106
  self.fps = fps
62
107
  self.num_frames = num_frames
108
+ self.keyframes_only = keyframes_only
109
+ self.all_frame_attrs = all_frame_attrs
63
110
 
64
- self.video_framerate = self.container.streams.video[0].average_rate
65
111
  self.video_time_base = self.container.streams.video[0].time_base
66
- self.video_start_time = self.container.streams.video[0].start_time or 0
67
-
68
- # Determine the number of frames in the video
69
- self.video_frame_count = self.container.streams.video[0].frames
70
- if self.video_frame_count == 0:
71
- # The video codec does not provide a frame count in the standard `frames` field. Try some other methods.
72
- metadata: dict = self.container.streams.video[0].metadata
73
- if 'NUMBER_OF_FRAMES' in metadata:
74
- self.video_frame_count = int(metadata['NUMBER_OF_FRAMES'])
75
- elif 'DURATION' in metadata:
76
- # As a last resort, calculate the frame count from the stream duration.
77
- duration = metadata['DURATION']
78
- assert isinstance(duration, str)
79
- seconds = pd.to_timedelta(duration).total_seconds()
80
- # Usually the duration and framerate are precise enough for this calculation to be accurate, but if
81
- # we encounter a case where it's off by one due to a rounding error, that's ok; we only use this
82
- # to determine the positions of the sampled frames when `fps` or `num_frames` is specified.
83
- self.video_frame_count = round(seconds * self.video_framerate)
84
- else:
85
- raise excs.Error(f'Video {video}: failed to get number of frames')
86
112
 
87
- if num_frames is not None:
88
- # specific number of frames
89
- if num_frames > self.video_frame_count:
90
- # Extract all frames
91
- self.frames_to_extract = None
113
+ start_time = self.container.streams.video[0].start_time or 0
114
+ self.video_start_time = float(start_time * self.video_time_base)
115
+
116
+ duration_pts: int | None = self.container.streams.video[0].duration
117
+ if duration_pts is not None:
118
+ self.video_duration = float(duration_pts * self.video_time_base)
119
+ else:
120
+ # As a backup, try to calculate duration from DURATION metadata field
121
+ metadata = self.container.streams.video[0].metadata
122
+ duration_field = metadata.get('DURATION') # A string like "00:01:23"
123
+ if duration_field is not None:
124
+ assert isinstance(duration_field, str)
125
+ self.video_duration = pd.to_timedelta(duration_field).total_seconds()
92
126
  else:
93
- spacing = float(self.video_frame_count) / float(num_frames)
94
- self.frames_to_extract = [round(i * spacing) for i in range(num_frames)]
95
- assert len(self.frames_to_extract) == num_frames
96
- elif fps is None or fps == 0.0:
97
- # Extract all frames
98
- self.frames_to_extract = None
99
- elif fps > float(self.video_framerate):
100
- raise excs.Error(
101
- f'Video {video}: requested fps ({fps}) exceeds that of the video ({float(self.video_framerate)})'
102
- )
127
+ # TODO: Anything we can do here? Other methods of determining the duration are expensive and
128
+ # not so appropriate for an iterator initializer.
129
+ self.video_duration = None
130
+
131
+ if self.video_duration is None and self.num_frames is not None:
132
+ raise excs.Error(f'Could not determine duration of video: {video}')
133
+
134
+ # If self.fps or self.num_frames is specified, we cannot rely on knowing in advance which frame positions will
135
+ # be needed, since for variable framerate videos we do not know in advance the precise timestamp of each frame.
136
+ # The strategy is: predetermine a list of "extraction times", the idealized timestamps of the frames we want to
137
+ # materialize. As we later iterate through the frames, we will choose the frames that are closest to these
138
+ # idealized timestamps.
139
+
140
+ self.pos = 0
141
+ self.video_idx = 0
142
+ if self.num_frames is not None:
143
+ # Divide the video duration into num_frames evenly spaced intervals. The extraction times are the midpoints
144
+ # of those intervals.
145
+ self.extraction_step = (self.video_duration - self.video_start_time) / self.num_frames
146
+ self.next_extraction_time = self.video_start_time + self.extraction_step / 2
147
+ elif self.fps is not None:
148
+ self.extraction_step = 1 / self.fps
149
+ self.next_extraction_time = self.video_start_time
103
150
  else:
104
- # Extract frames at the implied frequency
105
- freq = fps / float(self.video_framerate)
106
- n = math.ceil(self.video_frame_count * freq) # number of frames to extract
107
- self.frames_to_extract = [round(i / freq) for i in range(n)]
151
+ self.extraction_step = None
152
+ self.next_extraction_time = None
108
153
 
109
- _logger.debug(f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames}')
110
- self.next_pos = 0
154
+ _logger.debug(
155
+ f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames} '
156
+ f'keyframes_only={self.keyframes_only}'
157
+ )
158
+ self.cur_frame = self.next_frame()
111
159
 
112
160
  @classmethod
113
161
  def input_schema(cls) -> dict[str, ts.ColumnType]:
@@ -115,72 +163,427 @@ class FrameIterator(ComponentIterator):
115
163
  'video': ts.VideoType(nullable=False),
116
164
  'fps': ts.FloatType(nullable=True),
117
165
  'num_frames': ts.IntType(nullable=True),
166
+ 'keyframes_only': ts.BoolType(nullable=False),
167
+ 'all_frame_attrs': ts.BoolType(nullable=False),
118
168
  }
119
169
 
120
170
  @classmethod
121
171
  def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
122
- return {
123
- 'frame_idx': ts.IntType(),
124
- 'pos_msec': ts.FloatType(),
125
- 'pos_frame': ts.IntType(),
126
- 'frame': ts.ImageType(),
127
- }, ['frame']
172
+ attrs: dict[str, ts.ColumnType]
173
+ fps = kwargs.get('fps')
174
+ if fps is not None and (not isinstance(fps, (int, float)) or fps < 0.0):
175
+ raise excs.Error('`fps` must be a non-negative number')
128
176
 
129
- def __next__(self) -> dict[str, Any]:
130
- # Determine the frame index in the video corresponding to the iterator index `next_pos`;
131
- # the frame at this index is the one we want to extract next
132
- if self.frames_to_extract is None:
133
- next_video_idx = self.next_pos # we're extracting all frames
134
- elif self.next_pos >= len(self.frames_to_extract):
135
- raise StopIteration
177
+ if kwargs.get('all_frame_attrs'):
178
+ attrs = {'frame_attrs': ts.JsonType()}
136
179
  else:
137
- next_video_idx = self.frames_to_extract[self.next_pos]
138
-
139
- # We are searching for the frame at the index implied by `next_pos`. Step through the video until we
140
- # find it. There are two reasons why it might not be the immediate next frame in the video:
141
- # (1) `fps` or `num_frames` was specified as an iterator argument; or
142
- # (2) we just did a seek, and the desired frame is not a keyframe.
143
- # TODO: In case (1) it will usually be fastest to step through the frames until we find the one we're
144
- # looking for. But in some cases it may be faster to do a seek; for example, when `fps` is very
145
- # low and there are multiple keyframes in between each frame we want to extract (imagine extracting
146
- # 10 frames from an hourlong video).
180
+ attrs = {'frame_idx': ts.IntType(), 'pos_msec': ts.FloatType(), 'pos_frame': ts.IntType()}
181
+ return {**attrs, 'frame': ts.ImageType()}, ['frame']
182
+
183
+ def next_frame(self) -> av.VideoFrame | None:
184
+ try:
185
+ return next(self.container.decode(video=0))
186
+ except EOFError:
187
+ return None
188
+
189
+ def __next__(self) -> dict[str, Any]:
147
190
  while True:
148
- try:
149
- frame = next(self.container.decode(video=0))
150
- except EOFError:
151
- raise StopIteration from None
152
- # Compute the index of the current frame in the video based on the presentation timestamp (pts);
153
- # this ensures we have a canonical understanding of frame index, regardless of how we got here
154
- # (seek or iteration)
155
- pts = frame.pts - self.video_start_time
156
- video_idx = round(pts * self.video_time_base * self.video_framerate)
157
- assert isinstance(video_idx, int)
158
- if video_idx < next_video_idx:
159
- # We haven't reached the desired frame yet
191
+ if self.cur_frame is None:
192
+ raise StopIteration
193
+
194
+ next_frame = self.next_frame()
195
+
196
+ if self.keyframes_only and not self.cur_frame.key_frame:
197
+ self.cur_frame = next_frame
198
+ self.video_idx += 1
160
199
  continue
161
200
 
162
- # Sanity check that we're at the right frame.
163
- if video_idx != next_video_idx:
164
- raise excs.Error(f'Frame {next_video_idx} is missing from the video (video file is corrupt)')
165
- img = frame.to_image()
201
+ cur_frame_pts = self.cur_frame.pts
202
+ cur_frame_time = float(cur_frame_pts * self.video_time_base)
203
+
204
+ if self.extraction_step is not None:
205
+ # We are targeting a specified list of extraction times (because fps or num_frames was specified).
206
+ assert self.next_extraction_time is not None
207
+
208
+ if next_frame is None:
209
+ # cur_frame is the last frame of the video. If it is before the next extraction time, then we
210
+ # have reached the end of the video.
211
+ if cur_frame_time < self.next_extraction_time:
212
+ raise StopIteration
213
+ else:
214
+ # The extraction time represents the idealized timestamp of the next frame we want to extract.
215
+ # If next_frame is *closer* to it than cur_frame, then we skip cur_frame.
216
+ # The following logic handles all three cases:
217
+ # - next_extraction_time is before cur_frame_time (never skips)
218
+ # - next_extraction_time is after next_frame_time (always skips)
219
+ # - next_extraction_time is between cur_frame_time and next_frame_time (depends on which is closer)
220
+ next_frame_pts = next_frame.pts
221
+ next_frame_time = float(next_frame_pts * self.video_time_base)
222
+ if next_frame_time - self.next_extraction_time < self.next_extraction_time - cur_frame_time:
223
+ self.cur_frame = next_frame
224
+ self.video_idx += 1
225
+ continue
226
+
227
+ img = self.cur_frame.to_image()
166
228
  assert isinstance(img, PIL.Image.Image)
167
- pos_msec = float(pts * self.video_time_base * 1000)
168
- result = {'frame_idx': self.next_pos, 'pos_msec': pos_msec, 'pos_frame': video_idx, 'frame': img}
169
- self.next_pos += 1
229
+ result: dict[str, Any] = {'frame': img}
230
+ if self.all_frame_attrs:
231
+ attrs = {
232
+ 'index': self.video_idx,
233
+ 'pts': cur_frame_pts,
234
+ 'dts': self.cur_frame.dts,
235
+ 'time': float(cur_frame_pts * self.video_time_base),
236
+ 'is_corrupt': self.cur_frame.is_corrupt,
237
+ 'key_frame': self.cur_frame.key_frame,
238
+ 'pict_type': self.cur_frame.pict_type,
239
+ 'interlaced_frame': self.cur_frame.interlaced_frame,
240
+ }
241
+ result['frame_attrs'] = attrs
242
+ else:
243
+ pos_msec = float(cur_frame_pts * self.video_time_base * 1000 - self.video_start_time)
244
+ result.update({'frame_idx': self.pos, 'pos_msec': pos_msec, 'pos_frame': self.video_idx})
245
+
246
+ self.cur_frame = next_frame
247
+ self.video_idx += 1
248
+
249
+ self.pos += 1
250
+ if self.extraction_step is not None:
251
+ self.next_extraction_time += self.extraction_step
252
+
170
253
  return result
171
254
 
172
255
  def close(self) -> None:
173
256
  self.container.close()
174
257
 
175
- def set_pos(self, pos: int) -> None:
176
- if pos == self.next_pos:
177
- return # already there
178
-
179
- video_idx = pos if self.frames_to_extract is None else self.frames_to_extract[pos]
180
- _logger.debug(f'seeking to frame number {video_idx} (at iterator index {pos})')
181
- # compute the frame position in time_base units
182
- seek_pos = int(video_idx / self.video_framerate / self.video_time_base + self.video_start_time)
183
- # This will seek to the nearest keyframe before the desired frame. If the frame being sought is not a keyframe,
184
- # then the iterator will step forward to the desired frame on the subsequent call to next().
185
- self.container.seek(seek_pos, backward=True, stream=self.container.streams.video[0])
186
- self.next_pos = pos
258
+ def set_pos(self, pos: int, **kwargs: Any) -> None:
259
+ assert next(iter(kwargs.values()), None) is not None
260
+
261
+ if self.pos == pos:
262
+ # Nothing to do
263
+ return
264
+
265
+ self.pos = pos
266
+
267
+ seek_time: float
268
+ if 'pos_msec' in kwargs:
269
+ self.video_idx = kwargs['pos_frame']
270
+ seek_time = kwargs['pos_msec'] / 1000.0 + self.video_start_time
271
+ else:
272
+ assert 'frame_attrs' in kwargs
273
+ self.video_idx = kwargs['frame_attrs']['index']
274
+ seek_time = kwargs['frame_attrs']['time']
275
+
276
+ assert isinstance(self.video_idx, int)
277
+ assert isinstance(seek_time, float)
278
+
279
+ seek_pts = math.floor(seek_time / self.video_time_base)
280
+ self.container.seek(seek_pts, backward=True, stream=self.container.streams.video[0])
281
+
282
+ self.cur_frame = self.next_frame()
283
+ while self.cur_frame is not None and float(self.cur_frame.pts * self.video_time_base) < seek_time - 1e-3:
284
+ self.cur_frame = self.next_frame()
285
+ assert self.cur_frame is None or abs(float(self.cur_frame.pts * self.video_time_base) - seek_time) < 1e-3
286
+
287
+ @classmethod
288
+ @deprecated('create() is deprecated; use `pixeltable.functions.video.frame_iterator` instead', version='0.5.6')
289
+ def create(cls, **kwargs: Any) -> tuple[type[ComponentIterator], dict[str, Any]]:
290
+ return super()._create(**kwargs)
291
+
292
+
293
+ class VideoSplitter(ComponentIterator):
294
+ """
295
+ Iterator over segments of a video file, which is split into segments. The segments are specified either via a
296
+ fixed duration or a list of split points.
297
+
298
+ Args:
299
+ duration: Video segment duration in seconds
300
+ overlap: Overlap between consecutive segments in seconds. Only available for `mode='fast'`.
301
+ min_segment_duration: Drop the last segment if it is smaller than min_segment_duration.
302
+ segment_times: List of timestamps (in seconds) in video where segments should be split. Note that these are not
303
+ segment durations. If all segment times are less than the duration of the video, produces exactly
304
+ `len(segment_times) + 1` segments. An argument of `[]` will produce a single segment containing the
305
+ entire video.
306
+ mode: Segmentation mode:
307
+ - `'fast'`: Quick segmentation using stream copy (splits only at keyframes, approximate durations)
308
+ - `'accurate'`: Precise segmentation with re-encoding (exact durations, slower)
309
+ video_encoder: Video encoder to use. If not specified, uses the default encoder for the current platform.
310
+ Only available for `mode='accurate'`.
311
+ video_encoder_args: Additional arguments to pass to the video encoder. Only available for `mode='accurate'`.
312
+ """
313
+
314
+ # Input parameters
315
+ video_path: Path
316
+ segment_duration: float | None
317
+ segment_times: list[float] | None # [] is valid
318
+ overlap: float
319
+ min_segment_duration: float
320
+ video_encoder: str | None
321
+ video_encoder_args: dict[str, Any] | None
322
+
323
+ # Video metadata
324
+ video_time_base: Fraction
325
+
326
+ output_iter: Iterator[dict[str, Any]]
327
+
328
+ def __init__(
329
+ self,
330
+ video: str,
331
+ *,
332
+ duration: float | None = None,
333
+ overlap: float | None = None,
334
+ min_segment_duration: float | None = None,
335
+ segment_times: list[float] | None = None,
336
+ mode: Literal['fast', 'accurate'] = 'accurate',
337
+ video_encoder: str | None = None,
338
+ video_encoder_args: dict[str, Any] | None = None,
339
+ ):
340
+ Env.get().require_binary('ffmpeg')
341
+ self._check_args(
342
+ duration, segment_times, overlap, min_segment_duration, mode, video_encoder, video_encoder_args
343
+ )
344
+ assert (duration is not None) != (segment_times is not None)
345
+ if duration is not None:
346
+ assert duration > 0.0
347
+ assert duration >= min_segment_duration
348
+ assert overlap is None or overlap < duration
349
+
350
+ video_path = Path(video)
351
+ assert video_path.exists() and video_path.is_file()
352
+
353
+ self.video_path = video_path
354
+ self.segment_duration = duration
355
+ self.overlap = overlap if overlap is not None else 0.0
356
+ self.min_segment_duration = min_segment_duration if min_segment_duration is not None else 0.0
357
+ self.segment_times = segment_times
358
+ self.video_encoder = video_encoder
359
+ self.video_encoder_args = video_encoder_args
360
+
361
+ if self.segment_times is not None and len(self.segment_times) == 0:
362
+ self.output_iter = self.complete_video_iter()
363
+ else:
364
+ self.output_iter = self.fast_iter() if mode == 'fast' else self.accurate_iter()
365
+
366
+ with av.open(str(video_path)) as container:
367
+ self.video_time_base = container.streams.video[0].time_base
368
+
369
+ # TODO: check types of args
370
+
371
+ @classmethod
372
+ def input_schema(cls) -> dict[str, ts.ColumnType]:
373
+ return {
374
+ 'video': ts.VideoType(nullable=False),
375
+ 'duration': ts.FloatType(nullable=True),
376
+ 'overlap': ts.FloatType(nullable=True),
377
+ 'min_segment_duration': ts.FloatType(nullable=True),
378
+ 'segment_times': ts.JsonType(nullable=True),
379
+ 'mode': ts.StringType(nullable=False),
380
+ 'video_encoder': ts.StringType(nullable=True),
381
+ 'video_encoder_args': ts.JsonType(nullable=True),
382
+ }
383
+
384
+ @classmethod
385
+ def _check_args(
386
+ cls,
387
+ segment_duration: Any,
388
+ segment_times: Any,
389
+ overlap: Any,
390
+ min_segment_duration: Any,
391
+ mode: Any,
392
+ video_encoder: Any,
393
+ video_encoder_args: Any,
394
+ ) -> None:
395
+ if segment_duration is None and segment_times is None:
396
+ raise excs.Error('Must specify either duration or segment_times')
397
+ if segment_duration is not None and segment_times is not None:
398
+ raise excs.Error('duration and segment_times cannot both be specified')
399
+ if segment_times is not None and overlap is not None:
400
+ raise excs.Error('overlap cannot be specified with segment_times')
401
+ if segment_duration is not None and isinstance(segment_duration, (int, float)):
402
+ if segment_duration <= 0.0:
403
+ raise excs.Error(f'duration must be a positive number: {segment_duration}')
404
+ if (
405
+ min_segment_duration is not None
406
+ and isinstance(min_segment_duration, (int, float))
407
+ and segment_duration < min_segment_duration
408
+ ):
409
+ raise excs.Error(
410
+ f'duration must be at least min_segment_duration: {segment_duration} < {min_segment_duration}'
411
+ )
412
+ if overlap is not None and isinstance(overlap, (int, float)) and overlap >= segment_duration:
413
+ raise excs.Error(f'overlap must be less than duration: {overlap} >= {segment_duration}')
414
+ if mode == 'accurate' and overlap is not None:
415
+ raise excs.Error("Cannot specify overlap for mode='accurate'")
416
+ if mode == 'fast':
417
+ if video_encoder is not None:
418
+ raise excs.Error("Cannot specify video_encoder for mode='fast'")
419
+ if video_encoder_args is not None:
420
+ raise excs.Error("Cannot specify video_encoder_args for mode='fast'")
421
+
422
+ @classmethod
423
+ def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
424
+ param_names = ['duration', 'overlap', 'min_segment_duration', 'segment_times']
425
+ params = dict(zip(param_names, args))
426
+ params.update(kwargs)
427
+
428
+ segment_duration = params.get('duration')
429
+ segment_times = params.get('segment_times')
430
+ overlap = params.get('overlap')
431
+ min_segment_duration = params.get('min_segment_duration')
432
+ mode = params.get('mode', 'accurate')
433
+ video_encoder = params.get('video_encoder')
434
+ video_encoder_args = params.get('video_encoder_args')
435
+ cls._check_args(
436
+ segment_duration, segment_times, overlap, min_segment_duration, mode, video_encoder, video_encoder_args
437
+ )
438
+
439
+ return {
440
+ 'segment_start': ts.FloatType(nullable=True),
441
+ 'segment_start_pts': ts.IntType(nullable=True),
442
+ 'segment_end': ts.FloatType(nullable=True),
443
+ 'segment_end_pts': ts.IntType(nullable=True),
444
+ 'video_segment': ts.VideoType(nullable=False),
445
+ }, []
446
+
447
+ def complete_video_iter(self) -> Iterator[dict[str, Any]]:
448
+ """Returns the entire video as a single segment"""
449
+ assert len(self.segment_times) == 0
450
+
451
+ with av.open(str(self.video_path)) as container:
452
+ video_stream = container.streams.video[0]
453
+ start_ts = (
454
+ float(video_stream.start_time * video_stream.time_base)
455
+ if video_stream.start_time is not None and video_stream.time_base is not None
456
+ else 0.0
457
+ )
458
+ end_pts = (
459
+ video_stream.start_time + video_stream.duration
460
+ if video_stream.start_time is not None and video_stream.duration is not None
461
+ else None
462
+ )
463
+ end_ts = (
464
+ float(end_pts * video_stream.time_base)
465
+ if end_pts is not None and video_stream.time_base is not None
466
+ else 0.0
467
+ )
468
+ result = {
469
+ 'segment_start': start_ts,
470
+ 'segment_start_pts': video_stream.start_time,
471
+ 'segment_end': end_ts,
472
+ 'segment_end_pts': end_pts,
473
+ 'video_segment': str(self.video_path),
474
+ }
475
+ yield result
476
+
477
+ def fast_iter(self) -> Iterator[dict[str, Any]]:
478
+ segment_path: str = ''
479
+ assert self.segment_times is None or len(self.segment_times) > 0
480
+
481
+ try:
482
+ start_time = 0.0
483
+ start_pts = 0
484
+ segment_idx = 0
485
+ while True:
486
+ target_duration: float | None
487
+ if self.segment_duration is not None:
488
+ target_duration = self.segment_duration
489
+ elif self.segment_times is not None and segment_idx < len(self.segment_times):
490
+ target_duration = self.segment_times[segment_idx] - start_time
491
+ else:
492
+ target_duration = None # the rest of the video
493
+
494
+ segment_path = str(TempStore.create_path(extension='.mp4'))
495
+ cmd = av_utils.ffmpeg_clip_cmd(str(self.video_path), segment_path, start_time, target_duration)
496
+ _ = subprocess.run(cmd, capture_output=True, text=True, check=True)
497
+
498
+ # use the actual duration
499
+ segment_duration = av_utils.get_video_duration(segment_path)
500
+ if segment_duration - self.overlap == 0.0 or segment_duration < self.min_segment_duration:
501
+ # we're done
502
+ Path(segment_path).unlink()
503
+ return
504
+
505
+ segment_end = start_time + segment_duration
506
+ segment_end_pts = start_pts + round(segment_duration / self.video_time_base)
507
+ result = {
508
+ 'segment_start': start_time,
509
+ 'segment_start_pts': start_pts,
510
+ 'segment_end': segment_end,
511
+ 'segment_end_pts': segment_end_pts,
512
+ 'video_segment': segment_path,
513
+ }
514
+ yield result
515
+
516
+ start_time = segment_end - self.overlap
517
+ start_pts = segment_end_pts - round(self.overlap / self.video_time_base)
518
+
519
+ segment_idx += 1
520
+ if self.segment_times is not None and segment_idx > len(self.segment_times):
521
+ # We've created all segments including the final segment after the last segment_time
522
+ break
523
+
524
+ except subprocess.CalledProcessError as e:
525
+ if segment_path and Path(segment_path).exists():
526
+ Path(segment_path).unlink()
527
+ error_msg = f'ffmpeg failed with return code {e.returncode}'
528
+ if e.stderr:
529
+ error_msg += f': {e.stderr.strip()}'
530
+ raise pxt.Error(error_msg) from e
531
+
532
+ def accurate_iter(self) -> Iterator[dict[str, Any]]:
533
+ assert self.segment_times is None or len(self.segment_times) > 0
534
+ base_path = TempStore.create_path(extension='')
535
+ # Use ffmpeg -f segment for accurate segmentation with re-encoding
536
+ output_pattern = f'{base_path}_segment_%04d.mp4'
537
+ cmd = av_utils.ffmpeg_segment_cmd(
538
+ str(self.video_path),
539
+ output_pattern,
540
+ segment_duration=self.segment_duration,
541
+ segment_times=self.segment_times,
542
+ video_encoder=self.video_encoder,
543
+ video_encoder_args=self.video_encoder_args,
544
+ )
545
+
546
+ try:
547
+ _ = subprocess.run(cmd, capture_output=True, text=True, check=True)
548
+ output_paths = sorted(glob.glob(f'{base_path}_segment_*.mp4'))
549
+ # TODO: is this actually an error?
550
+ # if len(output_paths) == 0:
551
+ # stderr_output = result.stderr.strip() if result.stderr is not None else ''
552
+ # raise pxt.Error(
553
+ # f'ffmpeg failed to create output files for commandline: {" ".join(cmd)}\n{stderr_output}'
554
+ # )
555
+ start_time = 0.0
556
+ start_pts = 0
557
+ for segment_path in output_paths:
558
+ segment_duration = av_utils.get_video_duration(segment_path)
559
+ if segment_duration < self.min_segment_duration:
560
+ Path(segment_path).unlink()
561
+ return
562
+
563
+ result = {
564
+ 'segment_start': start_time,
565
+ 'segment_start_pts': start_pts,
566
+ 'segment_end': start_time + segment_duration,
567
+ 'segment_end_pts': start_pts + round(segment_duration / self.video_time_base),
568
+ 'video_segment': segment_path,
569
+ }
570
+ yield result
571
+ start_time += segment_duration
572
+ start_pts += round(segment_duration / self.video_time_base)
573
+
574
+ except subprocess.CalledProcessError as e:
575
+ error_msg = f'ffmpeg failed with return code {e.returncode}'
576
+ if e.stderr:
577
+ error_msg += f': {e.stderr.strip()}'
578
+ raise pxt.Error(error_msg) from e
579
+
580
+ def __next__(self) -> dict[str, Any]:
581
+ return next(self.output_iter)
582
+
583
+ def close(self) -> None:
584
+ pass
585
+
586
+ @classmethod
587
+ @deprecated('create() is deprecated; use `pixeltable.functions.video.video_splitter` instead', version='0.5.6')
588
+ def create(cls, **kwargs: Any) -> tuple[type[ComponentIterator], dict[str, Any]]:
589
+ return super()._create(**kwargs)