mvid 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mvid-0.1.0/PKG-INFO +53 -0
- mvid-0.1.0/README.md +42 -0
- mvid-0.1.0/pyproject.toml +25 -0
- mvid-0.1.0/src/mvid/__init__.py +239 -0
- mvid-0.1.0/src/mvid/py.typed +0 -0
mvid-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: mvid
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Simple video reading and writing
|
|
5
|
+
Author: Adam Alcolado
|
|
6
|
+
Author-email: Adam Alcolado <adam.alcolado@mtl.ai>
|
|
7
|
+
Requires-Dist: av
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Python: >=3.13
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# mvid
|
|
13
|
+
mvid is a simple library to treat video as a sequence (e.g. as a list) of NumPY arrays.
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from mvid import Video
|
|
17
|
+
|
|
18
|
+
with Video("myvideo.mp4") as video:
|
|
19
|
+
# get the number of frames
|
|
20
|
+
print(len(video))
|
|
21
|
+
|
|
22
|
+
# random access
|
|
23
|
+
frame = video[57]
|
|
24
|
+
|
|
25
|
+
# iterate over all frames in the video
|
|
26
|
+
for frame in video:
|
|
27
|
+
pass
|
|
28
|
+
```
|
|
29
|
+
It is built on top of PyAV (with minimal to no overhead) and abstracts away seeking and timing logic.
|
|
30
|
+
|
|
31
|
+
# Requirements
|
|
32
|
+
Requires [PyAV](https://pyav.basswood-io.com/docs/stable/) and NumPY.
|
|
33
|
+
|
|
34
|
+
# How it works
|
|
35
|
+
Frame lookup is based on decoding from the nearest preceding keyframe up to the requested index.
|
|
36
|
+
We determine that index using each frame’s timestamp together with the stream’s frame rate.
|
|
37
|
+
This approach works well for videos with consistent timing metadata, but not all files follow those assumptions.
|
|
38
|
+
Some containers use variable frame rates or contain incomplete or inconsistent timestamps. In those cases
|
|
39
|
+
there is no reliable way to infer a stable frame index without first scanning every frame and assigning
|
|
40
|
+
indices explicitly. Rather than performing that preprocessing step, we intentionally crash when encountering
|
|
41
|
+
timing metadata that cannot be interpreted unambiguously.
|
|
42
|
+
|
|
43
|
+
# Performance
|
|
44
|
+
Generally speaking, sequential access is as fast as possible thanks to PyAV. Check `benchmark.py` and compare
|
|
45
|
+
with `ffmpeg -i <my_video> -f null -`. The benchmarking script will also try random access and various
|
|
46
|
+
thread parameters so you can see what performance to expect.
|
|
47
|
+
|
|
48
|
+
There is overhead from conversion to NumPY arrays. We also provide a more "raw" AVVideo class that
|
|
49
|
+
performs all the bookkeeping without NumPY conversion.
|
|
50
|
+
|
|
51
|
+
# Related projects
|
|
52
|
+
[torchcodec](https://github.com/meta-pytorch/torchcodec) is a more heavy-duty library that returns PyTorch tensors.
|
|
53
|
+
It also has index-based access (among other options). It requires managing your installation of ffmpeg.
|
mvid-0.1.0/README.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# mvid
|
|
2
|
+
mvid is a simple library to treat video as a sequence (e.g. as a list) of NumPY arrays.
|
|
3
|
+
|
|
4
|
+
```python
|
|
5
|
+
from mvid import Video
|
|
6
|
+
|
|
7
|
+
with Video("myvideo.mp4") as video:
|
|
8
|
+
# get the number of frames
|
|
9
|
+
print(len(video))
|
|
10
|
+
|
|
11
|
+
# random access
|
|
12
|
+
frame = video[57]
|
|
13
|
+
|
|
14
|
+
# iterate over all frames in the video
|
|
15
|
+
for frame in video:
|
|
16
|
+
pass
|
|
17
|
+
```
|
|
18
|
+
It is built on top of PyAV (with minimal to no overhead) and abstracts away seeking and timing logic.
|
|
19
|
+
|
|
20
|
+
# Requirements
|
|
21
|
+
Requires [PyAV](https://pyav.basswood-io.com/docs/stable/) and NumPY.
|
|
22
|
+
|
|
23
|
+
# How it works
|
|
24
|
+
Frame lookup is based on decoding from the nearest preceding keyframe up to the requested index.
|
|
25
|
+
We determine that index using each frame’s timestamp together with the stream’s frame rate.
|
|
26
|
+
This approach works well for videos with consistent timing metadata, but not all files follow those assumptions.
|
|
27
|
+
Some containers use variable frame rates or contain incomplete or inconsistent timestamps. In those cases
|
|
28
|
+
there is no reliable way to infer a stable frame index without first scanning every frame and assigning
|
|
29
|
+
indices explicitly. Rather than performing that preprocessing step, we intentionally crash when encountering
|
|
30
|
+
timing metadata that cannot be interpreted unambiguously.
|
|
31
|
+
|
|
32
|
+
# Performance
|
|
33
|
+
Generally speaking, sequential access is as fast as possible thanks to PyAV. Check `benchmark.py` and compare
|
|
34
|
+
with `ffmpeg -i <my_video> -f null -`. The benchmarking script will also try random access and various
|
|
35
|
+
thread parameters so you can see what performance to expect.
|
|
36
|
+
|
|
37
|
+
There is overhead from conversion to NumPY arrays. We also provide a more "raw" AVVideo class that
|
|
38
|
+
performs all the bookkeeping without NumPY conversion.
|
|
39
|
+
|
|
40
|
+
# Related projects
|
|
41
|
+
[torchcodec](https://github.com/meta-pytorch/torchcodec) is a more heavy-duty library that returns PyTorch tensors.
|
|
42
|
+
It also has index-based access (among other options). It requires managing your installation of ffmpeg.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mvid"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Simple video reading and writing"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Adam Alcolado", email = "adam.alcolado@mtl.ai" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.13"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"av",
|
|
12
|
+
"numpy"
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[dependency-groups]
|
|
16
|
+
dev = [
|
|
17
|
+
"pytest>=9.0.2",
|
|
18
|
+
"ruff>=0.15.0",
|
|
19
|
+
"tqdm",
|
|
20
|
+
"pillow"
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[build-system]
|
|
24
|
+
requires = ["uv_build>=0.9.7,<0.10.0"]
|
|
25
|
+
build-backend = "uv_build"
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
from typing import Generator, Sequence
|
|
2
|
+
|
|
3
|
+
import av
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AVVideo(Sequence[av.VideoFrame]):
|
|
8
|
+
"""
|
|
9
|
+
This is the "raw" PyAV version of the Video class. It returns PyAV Frame objects.
|
|
10
|
+
|
|
11
|
+
See Video docs for more information about usage.
|
|
12
|
+
|
|
13
|
+
This class takes care of all the necessary seeking and bookkeeping.
|
|
14
|
+
|
|
15
|
+
The main idea is to seek to the nearest keyframe and decode all the frames until we reach the target frame index.
|
|
16
|
+
In the case that we will simply access the next frame, we hold on to the container.decode() context in a generator
|
|
17
|
+
so that we don't need to seek and repeat decoding packets.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
path,
|
|
23
|
+
video_stream_id=0,
|
|
24
|
+
thread_type="SLICE",
|
|
25
|
+
thread_count=0,
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Initialize AVVideo class.
|
|
29
|
+
|
|
30
|
+
:param path: path to video file
|
|
31
|
+
:param video_stream_id: id of video in container (i.e. 0 is the first video stream)
|
|
32
|
+
:param thread_type: 'SLICE' or 'FRAME', or 'AUTO'
|
|
33
|
+
see https://pyav.basswood-io.com/docs/develop/api/codec.html#av.codec.context.ThreadType,
|
|
34
|
+
and https://pyav.basswood-io.com/docs/stable/cookbook/basics.html#threading
|
|
35
|
+
:param thread_count: number of threads to use (0 is auto)
|
|
36
|
+
|
|
37
|
+
The best thread type to use depends on the way the video is encoded and your access pattern.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
if thread_type not in ("SLICE", "FRAME", "AUTO"):
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"thread_type '{thread_type}' is not 'SLICE', 'FRAME', or 'AUTO'"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
container: av.container.InputContainer = av.open(path)
|
|
46
|
+
stream: av.video.stream.VideoStream = container.streams.video[video_stream_id]
|
|
47
|
+
stream.thread_type = thread_type
|
|
48
|
+
stream.thread_count = thread_count
|
|
49
|
+
|
|
50
|
+
self._container = container
|
|
51
|
+
self._stream = stream
|
|
52
|
+
self._next_frame_idx = 0
|
|
53
|
+
self._generator = self._create_generator()
|
|
54
|
+
|
|
55
|
+
AVVideo._verify_timing(stream)
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _verify_timing(stream):
|
|
59
|
+
"""
|
|
60
|
+
Verify that the stream metadata satisfies our assumptions about timing
|
|
61
|
+
see https://pyav.basswood-io.com/docs/stable/api/time.html
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
if stream.start_time != 0:
|
|
65
|
+
raise ValueError("Video stream starts at an offset")
|
|
66
|
+
|
|
67
|
+
if stream.frames == 0:
|
|
68
|
+
raise ValueError("Unknown number of frames in the video file")
|
|
69
|
+
|
|
70
|
+
# The stream time_base gives the number of seconds per 'tick'.
|
|
71
|
+
# Each frame has presentation time stamp (PTS) which counts in ticks.
|
|
72
|
+
# The stream base_rate should give the frames per second (FPS) of the video
|
|
73
|
+
# (perhaps guessed_rate would be a good choice to use instead).
|
|
74
|
+
# If we calculate how many ticks are in a frame, this should be an integer.
|
|
75
|
+
# 1 / ticks_per_frame = frames_per_second * seconds_per_tick
|
|
76
|
+
ticks_per_frame = 1 / (stream.base_rate * stream.time_base)
|
|
77
|
+
if ticks_per_frame.denominator != 1:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"Ticks per frame ({float(ticks_per_frame)}) is not an integer for this video stream; check your file's timing metadata"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# duration in seconds == number of frames / fps
|
|
83
|
+
if stream.duration * stream.time_base != stream.frames / stream.base_rate:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"Duration of the video file in seconds is inconsistent with the number of frames; check your file's timing metadata"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def close(self):
|
|
89
|
+
self._generator.close()
|
|
90
|
+
self._container.close()
|
|
91
|
+
|
|
92
|
+
def __enter__(self):
|
|
93
|
+
return self
|
|
94
|
+
|
|
95
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
96
|
+
self.close()
|
|
97
|
+
|
|
98
|
+
def __len__(self):
|
|
99
|
+
return self._stream.frames
|
|
100
|
+
|
|
101
|
+
@staticmethod
|
|
102
|
+
def _create_generator_static(
|
|
103
|
+
container, stream
|
|
104
|
+
) -> Generator[av.VideoFrame, None, None]:
|
|
105
|
+
# This method is static to avoid circular references which can hog resources.
|
|
106
|
+
for frame in container.decode(stream.index):
|
|
107
|
+
yield frame
|
|
108
|
+
|
|
109
|
+
def _create_generator(self) -> Generator[av.VideoFrame, None, None]:
|
|
110
|
+
return self._create_generator_static(self._container, self._stream)
|
|
111
|
+
|
|
112
|
+
def _seek(self, frame_idx):
|
|
113
|
+
# By closing the generator, we exit the PyAV container.decode() context before seeking.
|
|
114
|
+
# This doesn't appear to be required, but seems like the safest thing to do.
|
|
115
|
+
self._generator.close()
|
|
116
|
+
|
|
117
|
+
pts_offset = frame_idx / self._stream.base_rate / self._stream.time_base
|
|
118
|
+
assert pts_offset == int(pts_offset) # verified at initialization
|
|
119
|
+
pts_offset = int(pts_offset)
|
|
120
|
+
self._container.seek(
|
|
121
|
+
offset=pts_offset, backward=True, any_frame=False, stream=self._stream
|
|
122
|
+
)
|
|
123
|
+
self._next_frame_idx = frame_idx
|
|
124
|
+
|
|
125
|
+
# We start a fresh container.decode() context after seeking. Otherwise, we have to deal with empty packets
|
|
126
|
+
# and old frames (especially with "AUTO" or "FRAME" threading). This also seems like the safest way to
|
|
127
|
+
# use the PyAV API.
|
|
128
|
+
self._generator = self._create_generator()
|
|
129
|
+
|
|
130
|
+
def _read(self):
|
|
131
|
+
for frame in self._generator:
|
|
132
|
+
# frame index = (ticks * seconds_per_tick) * fps
|
|
133
|
+
frame_idx = (frame.pts * frame.time_base) * self._stream.base_rate
|
|
134
|
+
|
|
135
|
+
if frame_idx != round(frame_idx):
|
|
136
|
+
raise ValueError(
|
|
137
|
+
f"Video frame index is not an integer ({float(frame_idx)}); check your video file"
|
|
138
|
+
)
|
|
139
|
+
frame_idx = round(frame_idx)
|
|
140
|
+
|
|
141
|
+
if frame_idx > self._next_frame_idx:
|
|
142
|
+
raise ValueError(f"Video file is missing frame {self._next_frame_idx}")
|
|
143
|
+
|
|
144
|
+
# might need to skip some frames after a seek
|
|
145
|
+
if frame_idx < self._next_frame_idx:
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
# we've checked > and <, so all that remains is ==
|
|
149
|
+
assert frame_idx == self._next_frame_idx
|
|
150
|
+
|
|
151
|
+
self._next_frame_idx += 1
|
|
152
|
+
return frame
|
|
153
|
+
|
|
154
|
+
def __getitem__(self, frame_idx: int):
|
|
155
|
+
if not 0 <= frame_idx < len(self):
|
|
156
|
+
raise IndexError
|
|
157
|
+
|
|
158
|
+
# very valuable to not seek unless it's necessary,
|
|
159
|
+
if frame_idx != self._next_frame_idx:
|
|
160
|
+
self._seek(frame_idx)
|
|
161
|
+
|
|
162
|
+
return self._read()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class Video(Sequence[np.ndarray]):
|
|
166
|
+
"""
|
|
167
|
+
Provides sequential and random access to video frames. The frames are returned as NumPy arrays.
|
|
168
|
+
|
|
169
|
+
Example usage:
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
with AVVideo(path) as video:
|
|
173
|
+
print(len(video)) # total number of frames
|
|
174
|
+
print(frame.shape) # e.g. (1080, 1920, 3)
|
|
175
|
+
print(frame.dtype) # e.g. np.uint8
|
|
176
|
+
frame = video[0] # first frame
|
|
177
|
+
frame = video[12] # frame 12
|
|
178
|
+
frame = video[len(video) - 1] # last frame
|
|
179
|
+
|
|
180
|
+
for frame in video: # sequential iteration
|
|
181
|
+
pass
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Videos with variable frame rates or inconsistent timing metadata may raise errors. This is
|
|
185
|
+
intentional so such cases can be inspected and future support evaluated.
|
|
186
|
+
|
|
187
|
+
Sequential access is generally faster than random access because random access may
|
|
188
|
+
require seeking and decoding intermediate frames that are ultimately discarded.
|
|
189
|
+
|
|
190
|
+
Thread type "AUTO" is generally faster for sequential access, but for random access it may be worse.
|
|
191
|
+
|
|
192
|
+
Video files that are I-frame encoded are generally faster at random access.
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
def __init__(
|
|
196
|
+
self,
|
|
197
|
+
path,
|
|
198
|
+
format="rgb24",
|
|
199
|
+
width=None,
|
|
200
|
+
height=None,
|
|
201
|
+
thread_type="SLICE",
|
|
202
|
+
thread_count=0,
|
|
203
|
+
):
|
|
204
|
+
"""
|
|
205
|
+
Initialize Video
|
|
206
|
+
|
|
207
|
+
:param path: path to video file
|
|
208
|
+
:param format: format when converting to numpy array (default rgb24, which is 8 bits per channel)
|
|
209
|
+
see https://pyav.basswood-io.com/docs/stable/api/video.html#av.video.format.VideoFormat
|
|
210
|
+
:param width: output width (None for same as video)
|
|
211
|
+
:param height: output height (None for same as video)
|
|
212
|
+
:param thread_type: thread type argument to pyav stream, must be 'SLICE' or 'FRAME', or 'AUTO'
|
|
213
|
+
:param thread_count: thread count argument to pyav stream
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
self._av_video = AVVideo(
|
|
217
|
+
path, thread_type=thread_type, thread_count=thread_count
|
|
218
|
+
)
|
|
219
|
+
self._format = format
|
|
220
|
+
self._width = width
|
|
221
|
+
self._height = height
|
|
222
|
+
|
|
223
|
+
def close(self):
|
|
224
|
+
self._av_video.close()
|
|
225
|
+
|
|
226
|
+
def __enter__(self):
|
|
227
|
+
return self
|
|
228
|
+
|
|
229
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
230
|
+
self.close()
|
|
231
|
+
|
|
232
|
+
def __len__(self):
|
|
233
|
+
return len(self._av_video)
|
|
234
|
+
|
|
235
|
+
def __getitem__(self, item) -> np.ndarray:
|
|
236
|
+
frame = self._av_video[item]
|
|
237
|
+
return frame.to_ndarray(
|
|
238
|
+
format=self._format, width=self._width, height=self._height
|
|
239
|
+
)
|
|
File without changes
|