mvid 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mvid-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.3
2
+ Name: mvid
3
+ Version: 0.1.0
4
+ Summary: Simple video reading and writing
5
+ Author: Adam Alcolado
6
+ Author-email: Adam Alcolado <adam.alcolado@mtl.ai>
7
+ Requires-Dist: av
8
+ Requires-Dist: numpy
9
+ Requires-Python: >=3.13
10
+ Description-Content-Type: text/markdown
11
+
12
+ # mvid
13
+ mvid is a simple library to treat video as a sequence (e.g. as a list) of NumPY arrays.
14
+
15
+ ```python
16
+ from mvid import Video
17
+
18
+ with Video("myvideo.mp4") as video:
19
+ # get the number of frames
20
+ print(len(video))
21
+
22
+ # random access
23
+ frame = video[57]
24
+
25
+ # iterate over all frames in the video
26
+ for frame in video:
27
+ pass
28
+ ```
29
+ It is built on top of PyAV (with minimal to no overhead) and abstracts away seeking and timing logic.
30
+
31
+ # Requirements
32
+ Requires [PyAV](https://pyav.basswood-io.com/docs/stable/) and NumPY.
33
+
34
+ # How it works
35
+ Frame lookup is based on decoding from the nearest preceding keyframe up to the requested index.
36
+ We determine that index using each frame’s timestamp together with the stream’s frame rate.
37
+ This approach works well for videos with consistent timing metadata, but not all files follow those assumptions.
38
+ Some containers use variable frame rates or contain incomplete or inconsistent timestamps. In those cases
39
+ there is no reliable way to infer a stable frame index without first scanning every frame and assigning
40
+ indices explicitly. Rather than performing that preprocessing step, we intentionally crash when encountering
41
+ timing metadata that cannot be interpreted unambiguously.
42
+
43
+ # Performance
44
+ Generally speaking, sequential access is as fast as possible thanks to PyAV. Check `benchmark.py` and compare
45
+ with `ffmpeg -i <my_video> -f null -`. The benchmarking script will also try random access and various
46
+ thread parameters so you can see what performance to expect.
47
+
48
+ There is overhead from conversion to NumPY arrays. We also provide a more "raw" AVVideo class that
49
+ performs all the bookkeeping without NumPY conversion.
50
+
51
+ # Related projects
52
+ [torchcodec](https://github.com/meta-pytorch/torchcodec) is a more heavy-duty library that returns PyTorch tensors.
53
+ It also has index-based access (among other options). It requires managing your installation of ffmpeg.
mvid-0.1.0/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # mvid
2
+ mvid is a simple library to treat video as a sequence (e.g. as a list) of NumPY arrays.
3
+
4
+ ```python
5
+ from mvid import Video
6
+
7
+ with Video("myvideo.mp4") as video:
8
+ # get the number of frames
9
+ print(len(video))
10
+
11
+ # random access
12
+ frame = video[57]
13
+
14
+ # iterate over all frames in the video
15
+ for frame in video:
16
+ pass
17
+ ```
18
+ It is built on top of PyAV (with minimal to no overhead) and abstracts away seeking and timing logic.
19
+
20
+ # Requirements
21
+ Requires [PyAV](https://pyav.basswood-io.com/docs/stable/) and NumPY.
22
+
23
+ # How it works
24
+ Frame lookup is based on decoding from the nearest preceding keyframe up to the requested index.
25
+ We determine that index using each frame’s timestamp together with the stream’s frame rate.
26
+ This approach works well for videos with consistent timing metadata, but not all files follow those assumptions.
27
+ Some containers use variable frame rates or contain incomplete or inconsistent timestamps. In those cases
28
+ there is no reliable way to infer a stable frame index without first scanning every frame and assigning
29
+ indices explicitly. Rather than performing that preprocessing step, we intentionally crash when encountering
30
+ timing metadata that cannot be interpreted unambiguously.
31
+
32
+ # Performance
33
+ Generally speaking, sequential access is as fast as possible thanks to PyAV. Check `benchmark.py` and compare
34
+ with `ffmpeg -i <my_video> -f null -`. The benchmarking script will also try random access and various
35
+ thread parameters so you can see what performance to expect.
36
+
37
+ There is overhead from conversion to NumPY arrays. We also provide a more "raw" AVVideo class that
38
+ performs all the bookkeeping without NumPY conversion.
39
+
40
+ # Related projects
41
+ [torchcodec](https://github.com/meta-pytorch/torchcodec) is a more heavy-duty library that returns PyTorch tensors.
42
+ It also has index-based access (among other options). It requires managing your installation of ffmpeg.
@@ -0,0 +1,25 @@
1
+ [project]
2
+ name = "mvid"
3
+ version = "0.1.0"
4
+ description = "Simple video reading and writing"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Adam Alcolado", email = "adam.alcolado@mtl.ai" }
8
+ ]
9
+ requires-python = ">=3.13"
10
+ dependencies = [
11
+ "av",
12
+ "numpy"
13
+ ]
14
+
15
+ [dependency-groups]
16
+ dev = [
17
+ "pytest>=9.0.2",
18
+ "ruff>=0.15.0",
19
+ "tqdm",
20
+ "pillow"
21
+ ]
22
+
23
+ [build-system]
24
+ requires = ["uv_build>=0.9.7,<0.10.0"]
25
+ build-backend = "uv_build"
@@ -0,0 +1,239 @@
1
+ from typing import Generator, Sequence
2
+
3
+ import av
4
+ import numpy as np
5
+
6
+
7
+ class AVVideo(Sequence[av.VideoFrame]):
8
+ """
9
+ This is the "raw" PyAV version of the Video class. It returns PyAV Frame objects.
10
+
11
+ See Video docs for more information about usage.
12
+
13
+ This class takes care of all the necessary seeking and bookkeeping.
14
+
15
+ The main idea is to seek to the nearest keyframe and decode all the frames until we reach the target frame index.
16
+ In the case that we will simply access the next frame, we hold on to the container.decode() context in a generator
17
+ so that we don't need to seek and repeat decoding packets.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ path,
23
+ video_stream_id=0,
24
+ thread_type="SLICE",
25
+ thread_count=0,
26
+ ):
27
+ """
28
+ Initialize AVVideo class.
29
+
30
+ :param path: path to video file
31
+ :param video_stream_id: id of video in container (i.e. 0 is the first video stream)
32
+ :param thread_type: 'SLICE' or 'FRAME', or 'AUTO'
33
+ see https://pyav.basswood-io.com/docs/develop/api/codec.html#av.codec.context.ThreadType,
34
+ and https://pyav.basswood-io.com/docs/stable/cookbook/basics.html#threading
35
+ :param thread_count: number of threads to use (0 is auto)
36
+
37
+ The best thread type to use depends on the way the video is encoded and your access pattern.
38
+ """
39
+
40
+ if thread_type not in ("SLICE", "FRAME", "AUTO"):
41
+ raise ValueError(
42
+ f"thread_type '{thread_type}' is not 'SLICE', 'FRAME', or 'AUTO'"
43
+ )
44
+
45
+ container: av.container.InputContainer = av.open(path)
46
+ stream: av.video.stream.VideoStream = container.streams.video[video_stream_id]
47
+ stream.thread_type = thread_type
48
+ stream.thread_count = thread_count
49
+
50
+ self._container = container
51
+ self._stream = stream
52
+ self._next_frame_idx = 0
53
+ self._generator = self._create_generator()
54
+
55
+ AVVideo._verify_timing(stream)
56
+
57
+ @staticmethod
58
+ def _verify_timing(stream):
59
+ """
60
+ Verify that the stream metadata satisfies our assumptions about timing
61
+ see https://pyav.basswood-io.com/docs/stable/api/time.html
62
+ """
63
+
64
+ if stream.start_time != 0:
65
+ raise ValueError("Video stream starts at an offset")
66
+
67
+ if stream.frames == 0:
68
+ raise ValueError("Unknown number of frames in the video file")
69
+
70
+ # The stream time_base gives the number of seconds per 'tick'.
71
+ # Each frame has presentation time stamp (PTS) which counts in ticks.
72
+ # The stream base_rate should give the frames per second (FPS) of the video
73
+ # (perhaps guessed_rate would be a good choice to use instead).
74
+ # If we calculate how many ticks are in a frame, this should be an integer.
75
+ # 1 / ticks_per_frame = frames_per_second * seconds_per_tick
76
+ ticks_per_frame = 1 / (stream.base_rate * stream.time_base)
77
+ if ticks_per_frame.denominator != 1:
78
+ raise ValueError(
79
+ f"Ticks per frame ({float(ticks_per_frame)}) is not an integer for this video stream; check your file's timing metadata"
80
+ )
81
+
82
+ # duration in seconds == number of frames / fps
83
+ if stream.duration * stream.time_base != stream.frames / stream.base_rate:
84
+ raise ValueError(
85
+ f"Duration of the video file in seconds is inconsistent with the number of frames; check your file's timing metadata"
86
+ )
87
+
88
+ def close(self):
89
+ self._generator.close()
90
+ self._container.close()
91
+
92
+ def __enter__(self):
93
+ return self
94
+
95
+ def __exit__(self, exc_type, exc_val, exc_tb):
96
+ self.close()
97
+
98
+ def __len__(self):
99
+ return self._stream.frames
100
+
101
+ @staticmethod
102
+ def _create_generator_static(
103
+ container, stream
104
+ ) -> Generator[av.VideoFrame, None, None]:
105
+ # This method is static to avoid circular references which can hog resources.
106
+ for frame in container.decode(stream.index):
107
+ yield frame
108
+
109
+ def _create_generator(self) -> Generator[av.VideoFrame, None, None]:
110
+ return self._create_generator_static(self._container, self._stream)
111
+
112
+ def _seek(self, frame_idx):
113
+ # By closing the generator, we exit the PyAV container.decode() context before seeking.
114
+ # This doesn't appear to be required, but seems like the safest thing to do.
115
+ self._generator.close()
116
+
117
+ pts_offset = frame_idx / self._stream.base_rate / self._stream.time_base
118
+ assert pts_offset == int(pts_offset) # verified at initialization
119
+ pts_offset = int(pts_offset)
120
+ self._container.seek(
121
+ offset=pts_offset, backward=True, any_frame=False, stream=self._stream
122
+ )
123
+ self._next_frame_idx = frame_idx
124
+
125
+ # We start a fresh container.decode() context after seeking. Otherwise, we have to deal with empty packets
126
+ # and old frames (especially with "AUTO" or "FRAME" threading). This also seems like the safest way to
127
+ # use the PyAV API.
128
+ self._generator = self._create_generator()
129
+
130
+ def _read(self):
131
+ for frame in self._generator:
132
+ # frame index = (ticks * seconds_per_tick) * fps
133
+ frame_idx = (frame.pts * frame.time_base) * self._stream.base_rate
134
+
135
+ if frame_idx != round(frame_idx):
136
+ raise ValueError(
137
+ f"Video frame index is not an integer ({float(frame_idx)}); check your video file"
138
+ )
139
+ frame_idx = round(frame_idx)
140
+
141
+ if frame_idx > self._next_frame_idx:
142
+ raise ValueError(f"Video file is missing frame {self._next_frame_idx}")
143
+
144
+ # might need to skip some frames after a seek
145
+ if frame_idx < self._next_frame_idx:
146
+ continue
147
+
148
+ # we've checked > and <, so all that remains is ==
149
+ assert frame_idx == self._next_frame_idx
150
+
151
+ self._next_frame_idx += 1
152
+ return frame
153
+
154
+ def __getitem__(self, frame_idx: int):
155
+ if not 0 <= frame_idx < len(self):
156
+ raise IndexError
157
+
158
+ # very valuable to not seek unless it's necessary,
159
+ if frame_idx != self._next_frame_idx:
160
+ self._seek(frame_idx)
161
+
162
+ return self._read()
163
+
164
+
165
+ class Video(Sequence[np.ndarray]):
166
+ """
167
+ Provides sequential and random access to video frames. The frames are returned as NumPy arrays.
168
+
169
+ Example usage:
170
+
171
+ ```python
172
+ with AVVideo(path) as video:
173
+ print(len(video)) # total number of frames
174
+ print(frame.shape) # e.g. (1080, 1920, 3)
175
+ print(frame.dtype) # e.g. np.uint8
176
+ frame = video[0] # first frame
177
+ frame = video[12] # frame 12
178
+ frame = video[len(video) - 1] # last frame
179
+
180
+ for frame in video: # sequential iteration
181
+ pass
182
+ ```
183
+
184
+ Videos with variable frame rates or inconsistent timing metadata may raise errors. This is
185
+ intentional so such cases can be inspected and future support evaluated.
186
+
187
+ Sequential access is generally faster than random access because random access may
188
+ require seeking and decoding intermediate frames that are ultimately discarded.
189
+
190
+ Thread type "AUTO" is generally faster for sequential access, but for random access it may be worse.
191
+
192
+ Video files that are I-frame encoded are generally faster at random access.
193
+ """
194
+
195
+ def __init__(
196
+ self,
197
+ path,
198
+ format="rgb24",
199
+ width=None,
200
+ height=None,
201
+ thread_type="SLICE",
202
+ thread_count=0,
203
+ ):
204
+ """
205
+ Initialize Video
206
+
207
+ :param path: path to video file
208
+ :param format: format when converting to numpy array (default rgb24, which is 8 bits per channel)
209
+ see https://pyav.basswood-io.com/docs/stable/api/video.html#av.video.format.VideoFormat
210
+ :param width: output width (None for same as video)
211
+ :param height: output height (None for same as video)
212
+ :param thread_type: thread type argument to pyav stream, must be 'SLICE' or 'FRAME', or 'AUTO'
213
+ :param thread_count: thread count argument to pyav stream
214
+ """
215
+
216
+ self._av_video = AVVideo(
217
+ path, thread_type=thread_type, thread_count=thread_count
218
+ )
219
+ self._format = format
220
+ self._width = width
221
+ self._height = height
222
+
223
+ def close(self):
224
+ self._av_video.close()
225
+
226
+ def __enter__(self):
227
+ return self
228
+
229
+ def __exit__(self, exc_type, exc_val, exc_tb):
230
+ self.close()
231
+
232
+ def __len__(self):
233
+ return len(self._av_video)
234
+
235
+ def __getitem__(self, item) -> np.ndarray:
236
+ frame = self._av_video[item]
237
+ return frame.to_ndarray(
238
+ format=self._format, width=self._width, height=self._height
239
+ )
File without changes