parallel-video-io 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Sibo Wang-Chen
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.4
2
+ Name: parallel-video-io
3
+ Version: 0.1.3
4
+ Summary: Tools for reading and writing videos, and loading them efficiently with PyTorch.
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Author: Sibo Wang-Chen
8
+ Author-email: sibo.wang@epfl.ch
9
+ Requires-Python: >=3.10,<=3.13
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: imageio (>=2.37.0,<3)
17
+ Requires-Dist: imageio-ffmpeg (==0.6.0)
18
+ Requires-Dist: joblib (>=1.5.1,<2)
19
+ Requires-Dist: numpy (>=2.0,<3)
20
+ Requires-Dist: pytest (>=8.4.2,<9)
21
+ Requires-Dist: torch (>=2.8,<3)
22
+ Requires-Dist: torchcodec (==0.7.0)
23
+ Requires-Dist: tqdm (>=4.67,<5)
24
+ Project-URL: Repository, https://github.com/sibocw/parallel-video-io
25
+ Description-Content-Type: text/markdown
26
+
27
+ # parallel-video-io
28
+
29
+ Tools for reading and writing videos and for efficient frame-level loading with PyTorch.
30
+
31
+ This repository provides small, focused utilities around video I/O and a PyTorch-friendly iterable dataset + dataloader that make it easy to stream frames from many videos or directories of image frames in parallel.
32
+
33
+ ## Key features
34
+ - Read frames from videos (random access or sequential) using imageio/ffmpeg.
35
+ - Write sequences of numpy frames to H.264 MP4 files with sane defaults.
36
+ - PyTorch-compatible `VideoCollectionDataset` and `VideoCollectionDataLoader` that provide a simple iterator that uses multiple processes to load data from different videos under the hood. This is especially handy for running trained deep learning models on many videos in production.
37
+
38
+ ## Table of contents
39
+ - [Installation](#installation)
40
+ - [Quick examples](#quick-examples)
41
+ - [Reading video metadata](#reading-video-metadata)
42
+ - [Reading video frames](#reading-video-frames)
43
+ - [Writing a video](#writing-a-video)
44
+ - [Using the PyTorch dataset and dataloader](#using-the-pytorch-dataset-and-dataloader)
45
+ - [Testing](#testing)
46
+ - [Notes & troubleshooting](#notes--troubleshooting)
47
+
48
+ ## Installation
49
+
50
+ This project targets Python >= 3.10. The library's runtime dependencies are listed in `pyproject.toml` (torch, imageio, imageio-ffmpeg, torchcodec, joblib, tqdm, numpy, pytest).
51
+
52
+ If you're using pip in a development environment, install editable with:
53
+
54
+ ```bash
55
+ pip install -e .
56
+ ```
57
+
58
+ Or with Poetry:
59
+
60
+ ```bash
61
+ poetry install
62
+ ```
63
+
64
+ You can include this package as a dependency for your project by including the following in your `pyproject.toml`:
65
+
66
+ ```toml
67
+ [project]
68
+ # ... other stuff
69
+ dependencies = [
70
+ # ...
71
+ "parallel-video-io @ git+https://github.com/sibocw/parallel-video-io.git",
72
+ ]
73
+ ```
74
+
75
+ Make sure `ffmpeg` is available on your `$PATH` (required by imageio-ffmpeg).
76
+
77
+ ## Quick examples
78
+
79
+ These examples use NumPy arrays for frames in (height, width, channels) order and uint8 dtype.
80
+
81
+ ### Reading video metadata
82
+
83
+ ```python
84
+ from pvio.video_io import get_video_metadata, check_num_frames
85
+
86
+ # To get the number of frames in a video
87
+ n_frames = check_num_frames("example.mp4")
88
+ print(n_frames) # this is an integer frame count
89
+
90
+ # To get more information
91
+ # Note that this function actually caches these information in a JSON file. To control
92
+ # whether you want to save the cache file or disregard existing cache files, set the
93
+ # `cache_metadata` (default True) and `use_cached_metadata` (default True) arguments.
94
+ meta = get_video_metadata("example.mp4")
95
+ print(meta) # meta is a dictionary containing the keys "n_frames", "frame_size", "fps"
96
+ ```
97
+
98
+ ### Reading video frames
99
+
100
+ ```python
101
+ from pvio.video_io import read_frames_from_video
102
+
103
+ # You can read a whole video
104
+ frames, fps = read_frames_from_video("example.mp4")
105
+
106
+ # ... or just some frames
107
+ frames, fps = read_frames_from_video("example.mp4", frame_indices=[0, 5])
108
+ ```
109
+
110
+ ### Writing a video
111
+
112
+ ```python
113
+ import numpy as np
114
+ from pvio.video_io import write_frames_to_video
115
+
116
+ # Create dummy 32x32 RGB frames (H, W, C)
117
+ frames = [np.full((32, 32, 3), fill_value=i, dtype=np.uint8) for i in range(10)]
118
+
119
+ # Save them to file
120
+ # There are more complex video writing parameters that can be tuned - see the docstring
121
+ # for details.
122
+ write_frames_to_video("example.mp4", frames, fps=25.0)
123
+ ```
124
+
125
+ Notes: the writer verifies that all frames share the same (height, width). FFmpeg can
126
+ automatically resize frames to meet codec alignment requirements; for deterministic
127
+ results, use dimensions divisible by 16.
128
+
129
+ ### Using the PyTorch dataset and dataloader
130
+
131
+ The `VideoCollectionDataset` iterates frames either from video files or from directories containing individual image frames.
132
+
133
+ ```python
134
+ from pvio.torch import VideoCollectionDataset, VideoCollectionDataLoader
135
+
136
+ # Initialize Dataset from video files
137
+ paths = ["/path/to/video1.mp4", "/path/to/video2.mp4"]
138
+ ds = VideoCollectionDataset(paths)
139
+ # ... or from directories containing individual frames as images
140
+ paths = ["/path/to/frames_dir1", "/path/to/frames_dir2"]
141
+ # To control sorting of frame files within each dir, use the `frame_sorting` argument
142
+ # (see docstring for details)
143
+ ds = VideoCollectionDataset(paths, as_image_dirs=True)
144
+
145
+ # Wrap in the special DataLoader
146
+ # (you can add other DataLoader keyword arguments if you wish)
147
+ loader = VideoCollectionDataLoader(ds, batch_size=8, num_workers=4)
148
+
149
+ # Now you can iterate over all frames from all videos in a single iterator. Behind the
150
+ # scenes, these frames are fetched in parallel (each worker handles one video at a time)
151
+ for batch in loader:
152
+ frames = batch["frames"] # torch.Tensor: B x C x H x W
153
+ video_paths = batch["video_paths"] # list of Path or str, depending on input
154
+ frame_indices = batch["frame_indices"] # list of int
155
+ ```
156
+
157
+ When loading from video files (as_image_dirs=False), the dataset uses `torchcodec`'s `VideoDecoder` to decode frames and `get_video_metadata` to build per-video frame counts; you may want to enable caching if you index many large files.
158
+
159
+ ## Testing
160
+
161
+ The test suite uses pytest. Run it from the repository root:
162
+
163
+ ```bash
164
+ pytest tests
165
+ ```
166
+
167
+ There are a few tests that write small MP4 files using imageio/ffmpeg; ensure `ffmpeg` is available in the environment where tests run.
168
+
169
+ ## Notes & troubleshooting
170
+
171
+ - FFmpeg macroblock constraints: some ffmpeg builds require frame dimensions to be divisible by 16. If you see a warning about `macro_block_size=16` and unexpected resizing, choose frame sizes divisible by 16 in production pipelines.
172
+ - If you plan to decode many large videos, enabling metadata caching (the package writes a `.metadata.json` next to each video when `get_video_metadata` is called) will speed up repeated indexing.
173
+ - The PyTorch loader expects the dataset passed to `VideoCollectionDataLoader` to be an instance of `VideoCollectionDataset` and enforces the built-in collate function.
174
+
@@ -0,0 +1,147 @@
1
+ # parallel-video-io
2
+
3
+ Tools for reading and writing videos and for efficient frame-level loading with PyTorch.
4
+
5
+ This repository provides small, focused utilities around video I/O and a PyTorch-friendly iterable dataset + dataloader that make it easy to stream frames from many videos or directories of image frames in parallel.
6
+
7
+ ## Key features
8
+ - Read frames from videos (random access or sequential) using imageio/ffmpeg.
9
+ - Write sequences of numpy frames to H.264 MP4 files with sane defaults.
10
+ - PyTorch-compatible `VideoCollectionDataset` and `VideoCollectionDataLoader` that provide a simple iterator that uses multiple processes to load data from different videos under the hood. This is especially handy for running trained deep learning models on many videos in production.
11
+
12
+ ## Table of contents
13
+ - [Installation](#installation)
14
+ - [Quick examples](#quick-examples)
15
+ - [Reading video metadata](#reading-video-metadata)
16
+ - [Reading video frames](#reading-video-frames)
17
+ - [Writing a video](#writing-a-video)
18
+ - [Using the PyTorch dataset and dataloader](#using-the-pytorch-dataset-and-dataloader)
19
+ - [Testing](#testing)
20
+ - [Notes & troubleshooting](#notes--troubleshooting)
21
+
22
+ ## Installation
23
+
24
+ This project targets Python >= 3.10. The library's runtime dependencies are listed in `pyproject.toml` (torch, imageio, imageio-ffmpeg, torchcodec, joblib, tqdm, numpy, pytest).
25
+
26
+ If you're using pip in a development environment, install editable with:
27
+
28
+ ```bash
29
+ pip install -e .
30
+ ```
31
+
32
+ Or with Poetry:
33
+
34
+ ```bash
35
+ poetry install
36
+ ```
37
+
38
+ You can include this package as a dependency for your project by including the following in your `pyproject.toml`:
39
+
40
+ ```toml
41
+ [project]
42
+ # ... other stuff
43
+ dependencies = [
44
+ # ...
45
+ "parallel-video-io @ git+https://github.com/sibocw/parallel-video-io.git",
46
+ ]
47
+ ```
48
+
49
+ Make sure `ffmpeg` is available on your `$PATH` (required by imageio-ffmpeg).
50
+
51
+ ## Quick examples
52
+
53
+ These examples use NumPy arrays for frames in (height, width, channels) order and uint8 dtype.
54
+
55
+ ### Reading video metadata
56
+
57
+ ```python
58
+ from pvio.video_io import get_video_metadata, check_num_frames
59
+
60
+ # To get the number of frames in a video
61
+ n_frames = check_num_frames("example.mp4")
62
+ print(n_frames) # this is an integer frame count
63
+
64
+ # To get more information
65
+ # Note that this function actually caches these information in a JSON file. To control
66
+ # whether you want to save the cache file or disregard existing cache files, set the
67
+ # `cache_metadata` (default True) and `use_cached_metadata` (default True) arguments.
68
+ meta = get_video_metadata("example.mp4")
69
+ print(meta) # meta is a dictionary containing the keys "n_frames", "frame_size", "fps"
70
+ ```
71
+
72
+ ### Reading video frames
73
+
74
+ ```python
75
+ from pvio.video_io import read_frames_from_video
76
+
77
+ # You can read a whole video
78
+ frames, fps = read_frames_from_video("example.mp4")
79
+
80
+ # ... or just some frames
81
+ frames, fps = read_frames_from_video("example.mp4", frame_indices=[0, 5])
82
+ ```
83
+
84
+ ### Writing a video
85
+
86
+ ```python
87
+ import numpy as np
88
+ from pvio.video_io import write_frames_to_video
89
+
90
+ # Create dummy 32x32 RGB frames (H, W, C)
91
+ frames = [np.full((32, 32, 3), fill_value=i, dtype=np.uint8) for i in range(10)]
92
+
93
+ # Save them to file
94
+ # There are more complex video writing parameters that can be tuned - see the docstring
95
+ # for details.
96
+ write_frames_to_video("example.mp4", frames, fps=25.0)
97
+ ```
98
+
99
+ Notes: the writer verifies that all frames share the same (height, width). FFmpeg can
100
+ automatically resize frames to meet codec alignment requirements; for deterministic
101
+ results, use dimensions divisible by 16.
102
+
103
+ ### Using the PyTorch dataset and dataloader
104
+
105
+ The `VideoCollectionDataset` iterates frames either from video files or from directories containing individual image frames.
106
+
107
+ ```python
108
+ from pvio.torch import VideoCollectionDataset, VideoCollectionDataLoader
109
+
110
+ # Initialize Dataset from video files
111
+ paths = ["/path/to/video1.mp4", "/path/to/video2.mp4"]
112
+ ds = VideoCollectionDataset(paths)
113
+ # ... or from directories containing individual frames as images
114
+ paths = ["/path/to/frames_dir1", "/path/to/frames_dir2"]
115
+ # To control sorting of frame files within each dir, use the `frame_sorting` argument
116
+ # (see docstring for details)
117
+ ds = VideoCollectionDataset(paths, as_image_dirs=True)
118
+
119
+ # Wrap in the special DataLoader
120
+ # (you can add other DataLoader keyword arguments if you wish)
121
+ loader = VideoCollectionDataLoader(ds, batch_size=8, num_workers=4)
122
+
123
+ # Now you can iterate over all frames from all videos in a single iterator. Behind the
124
+ # scenes, these frames are fetched in parallel (each worker handles one video at a time)
125
+ for batch in loader:
126
+ frames = batch["frames"] # torch.Tensor: B x C x H x W
127
+ video_paths = batch["video_paths"] # list of Path or str, depending on input
128
+ frame_indices = batch["frame_indices"] # list of int
129
+ ```
130
+
131
+ When loading from video files (as_image_dirs=False), the dataset uses `torchcodec`'s `VideoDecoder` to decode frames and `get_video_metadata` to build per-video frame counts; you may want to enable caching if you index many large files.
132
+
133
+ ## Testing
134
+
135
+ The test suite uses pytest. Run it from the repository root:
136
+
137
+ ```bash
138
+ pytest tests
139
+ ```
140
+
141
+ There are a few tests that write small MP4 files using imageio/ffmpeg; ensure `ffmpeg` is available in the environment where tests run.
142
+
143
+ ## Notes & troubleshooting
144
+
145
+ - FFmpeg macroblock constraints: some ffmpeg builds require frame dimensions to be divisible by 16. If you see a warning about `macro_block_size=16` and unexpected resizing, choose frame sizes divisible by 16 in production pipelines.
146
+ - If you plan to decode many large videos, enabling metadata caching (the package writes a `.metadata.json` next to each video when `get_video_metadata` is called) will speed up repeated indexing.
147
+ - The PyTorch loader expects the dataset passed to `VideoCollectionDataLoader` to be an instance of `VideoCollectionDataset` and enforces the built-in collate function.
@@ -0,0 +1,31 @@
1
+ [project]
2
+ name = "parallel-video-io"
3
+ version = "0.1.3"
4
+ description = "Tools for reading and writing videos, and loading them efficiently with PyTorch."
5
+ authors = [
6
+ {name = "Sibo Wang-Chen", email = "sibo.wang@epfl.ch"}
7
+ ]
8
+ readme = "README.md"
9
+ license = {text = "MIT"}
10
+ requires-python = ">=3.10,<=3.13"
11
+ dependencies = [
12
+ "torch>=2.8,<3",
13
+ "imageio>=2.37.0,<3",
14
+ "imageio-ffmpeg==0.6.0",
15
+ "torchcodec==0.7.0",
16
+ "joblib>=1.5.1,<2",
17
+ "tqdm>=4.67,<5",
18
+ "numpy>=2.0,<3",
19
+ "pytest>=8.4.2,<9",
20
+ ]
21
+
22
+ [project.urls]
23
+ repository = "https://github.com/sibocw/parallel-video-io"
24
+
25
+ [tool.poetry]
26
+ packages = [{include = "pvio", from = "src"}]
27
+
28
+
29
+ [build-system]
30
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
31
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,7 @@
1
+ from .video_io import (
2
+ read_frames_from_video,
3
+ write_frames_to_video,
4
+ check_num_frames,
5
+ get_video_metadata,
6
+ )
7
+ from .torch import VideoCollectionDataset, VideoCollectionDataLoader
@@ -0,0 +1,220 @@
1
+ import torch
2
+ import logging
3
+ import re
4
+ import imageio.v2 as imageio
5
+ from typing import Callable
6
+ from torchcodec.decoders import VideoDecoder
7
+ from torch.utils.data import IterableDataset, DataLoader, get_worker_info
8
+ from pathlib import Path
9
+ from tqdm import tqdm
10
+ from joblib import Parallel, delayed
11
+
12
+ from .video_io import get_video_metadata
13
+ from .util import balance_load_lpt
14
+
15
+
16
+ class VideoCollectionDataset(IterableDataset):
17
+ """Yields individual frames from Spotlight behavior recordings, either
18
+ from videos or from image sequences."""
19
+
20
+ def __init__(
21
+ self,
22
+ paths: list[Path | str],
23
+ as_image_dirs: bool = False,
24
+ frame_sorting: None | str = None,
25
+ transform: Callable | None = None,
26
+ ):
27
+ r"""
28
+ Args:
29
+ paths (list[Path]): List of to video paths, or directories
30
+ containing frames as individual images.
31
+ as_image_dirs (bool): If True, treat each path as a directory
32
+ containing individual frames. Otherwise, treat it as a
33
+ video file.
34
+ frame_sorting (str | None): When `as_image_dirs` is True, this
35
+ argument specifies how images within each directory should
36
+ be sorted. If None, files are sorted by name. If given as a
37
+ string, it is used as a regex pattern to extract frame
38
+ numbers from filenames (e.g. r"frame\D*(\d+)(?!\d)").
39
+ When `as_image_dirs` is False, this argument is ignored.
40
+ transform (Callable | None): A function that is to be applied
41
+ to each frame after loading. Note that the following
42
+ operations are already applied to each frame:
43
+ (i) conversion from numpy array to torch tensor,
44
+ (ii) conversion from HWC to CHW format, and
45
+ (iii) conversion from uint8 in [0, 255] to float in [0, 1].
46
+ The transform function, if provided, is applied after these
47
+ operations.
48
+ """
49
+ self.video_paths = [Path(p) for p in paths]
50
+ self.worker_assignments = None
51
+ self.as_image_dirs = as_image_dirs
52
+ self.frame_sorting = frame_sorting
53
+ self.n_frames_lookup = None # Populated by assign_workers()
54
+ self.transform = transform
55
+
56
+ # Check if the paths are all valid
57
+ for p in self.video_paths:
58
+ if self.as_image_dirs:
59
+ if not p.is_dir():
60
+ raise ValueError(
61
+ f"One of the specified paths {p} is not a valid directory. "
62
+ "Directories containing individual frame images are expected."
63
+ )
64
+ else:
65
+ if not p.is_file():
66
+ raise ValueError(
67
+ f"One of the specified paths {p} is not a valid file. "
68
+ "Video files are expected."
69
+ )
70
+
71
+ # Sort images if we're loading from directories of images
72
+ self.frame_sortings = {}
73
+ regex = re.compile(frame_sorting) if frame_sorting else None
74
+ if as_image_dirs:
75
+ # Iterate over the canonical Path objects (self.video_paths) so we
76
+ # consistently store Path keys and avoid relying on caller types
77
+ for path in self.video_paths:
78
+ all_files = [f for f in path.iterdir() if f.is_file()]
79
+ if regex is None:
80
+ sorting_func = lambda f: f.name
81
+ else:
82
+ sorting_func = lambda f: self._extract_frame_number(f.name, regex)
83
+ # Store a new sorted list (list.sort() returns None)
84
+ self.frame_sortings[path] = sorted(all_files, key=sorting_func)
85
+
86
+ def assign_workers(
87
+ self, n_frame_loading_workers: int, n_metadata_indexing_workers: int = -1
88
+ ):
89
+ # Check how many frame loading workers we're actually using (e.g. -1 actually
90
+ # means all available cores, so we need to figure out how many that is)
91
+ n_frame_loading_workers_effective = Parallel(
92
+ n_jobs=n_frame_loading_workers
93
+ )._effective_n_jobs()
94
+ logging.info(
95
+ f"Caller specified {n_frame_loading_workers} workers for frame loading. "
96
+ f"This is effectively {n_frame_loading_workers_effective} workers."
97
+ )
98
+
99
+ # Figure out how many frames there are in each video. This allows us to split
100
+ # the workload more evenly among workers by the number of frames.
101
+ if self.as_image_dirs:
102
+ self.n_frames_lookup = {
103
+ path: len(frames) for path, frames in self.frame_sortings.items()
104
+ }
105
+ else:
106
+ # Count frames in videos. This requires partially decoding the video files
107
+ # and it can be quite slow, so we do it in parallel and use caches.
108
+ logging.info(
109
+ f"Loading metadata for {len(self.video_paths)} videos. "
110
+ "This may take a while if no cached metadata is available."
111
+ )
112
+ metas = Parallel(n_jobs=n_metadata_indexing_workers)(
113
+ delayed(get_video_metadata)(path)
114
+ for path in tqdm(self.video_paths, desc="Indexing videos", disable=None)
115
+ )
116
+ self.n_frames_lookup = {
117
+ path: meta["n_frames"] for path, meta in zip(self.video_paths, metas)
118
+ }
119
+
120
+ # Split videos evenly among the available workers
121
+ self.worker_assignments = balance_load_lpt(
122
+ self.n_frames_lookup, max(1, n_frame_loading_workers_effective)
123
+ )
124
+
125
+ def __iter__(self):
126
+ # Get worker info for distributed loading
127
+ worker_info = get_worker_info()
128
+ if worker_info is None:
129
+ # Single process
130
+ video_subset = self.video_paths
131
+ else:
132
+ # Split videos among workers
133
+ video_subset = self.worker_assignments[worker_info.id]
134
+
135
+ # Each worker sequentially decodes its assigned videos
136
+ for video_path in video_subset:
137
+ if self.as_image_dirs:
138
+ # Read individual images
139
+ frame_files = self.frame_sortings[video_path]
140
+ for frame_idx, frame_file in enumerate(frame_files):
141
+ frame = imageio.imread(frame_file)
142
+ frame = torch.from_numpy(frame)
143
+ if frame.ndim == 2:
144
+ frame = frame.unsqueeze(-1) # add channel dim
145
+ frame = frame.permute(2, 0, 1) # HWC to CHW
146
+ frame = frame.float() / 255.0 # to float in [0, 1]
147
+ if self.transform:
148
+ frame = self.transform(frame)
149
+ yield {
150
+ "frame": frame,
151
+ "video_path": video_path,
152
+ "frame_idx": frame_idx,
153
+ }
154
+ else:
155
+ # Use torchcodec to decode videos
156
+ decoder = VideoDecoder(video_path)
157
+ for frame_idx in range(len(decoder)):
158
+ frame = decoder[frame_idx] # returns tensor in CHW
159
+ frame = frame.float() / 255.0 # to float in [0, 1]
160
+ if self.transform:
161
+ frame = self.transform(frame)
162
+ yield {
163
+ "frame": frame,
164
+ "video_path": video_path,
165
+ "frame_idx": frame_idx,
166
+ }
167
+
168
+ def __len__(self):
169
+ if self.n_frames_lookup is None:
170
+ raise ValueError(
171
+ "VideoCollectionDataset length is unknown until workers are assigned. "
172
+ "Call `assign_workers()` before using `len()`."
173
+ )
174
+ return sum(self.n_frames_lookup.values())
175
+
176
+ @staticmethod
177
+ def _extract_frame_number(filename: str, regex_pattern: str) -> int:
178
+ matches = re.findall(regex_pattern, filename)
179
+ if len(matches) != 1:
180
+ raise ValueError(
181
+ f"{len(matches)} matches found in filename {filename} "
182
+ f"using regex pattern {regex_pattern}. Only one match is expected."
183
+ )
184
+ try:
185
+ return int(matches[0])
186
+ except ValueError as e:
187
+ raise ValueError(
188
+ f"Failed to parse '{matches[0]}' as int. This substring is extracted "
189
+ f"from filename {filename} using regex pattern {regex_pattern}."
190
+ ) from e
191
+
192
+
193
+ class VideoCollectionDataLoader(DataLoader):
194
+ def __init__(self, dataset: VideoCollectionDataset, **kwargs):
195
+ if not isinstance(dataset, VideoCollectionDataset):
196
+ raise ValueError(
197
+ "VideoCollectionDataLoader only works with VideoCollectionDataset."
198
+ )
199
+ if kwargs.get("batch_sampler") is not None:
200
+ raise ValueError(
201
+ "VideoCollectionDataLoader does not support custom batch samplers."
202
+ )
203
+ if kwargs.get("collate_fn") is not None:
204
+ raise ValueError(
205
+ "VideoCollectionDataLoader must use the built-in collate function."
206
+ )
207
+
208
+ kwargs["collate_fn"] = self._collate
209
+ super().__init__(dataset, **kwargs)
210
+
211
+ self.dataset.assign_workers(n_frame_loading_workers=self.num_workers)
212
+
213
+ @staticmethod
214
+ def _collate(batch):
215
+ """Receives a list of frame dicts, returns a batched dict"""
216
+ return {
217
+ "frames": torch.stack([item["frame"] for item in batch]),
218
+ "video_paths": [item["video_path"] for item in batch],
219
+ "frame_indices": [item["frame_idx"] for item in batch],
220
+ }
@@ -0,0 +1,34 @@
1
+ import numpy as np
2
+ from typing import Hashable
3
+
4
+
5
+ def balance_load_lpt(
6
+ tasks: dict[Hashable, int], n_workers: int
7
+ ) -> list[list[Hashable]]:
8
+ """The Longest Processing Time (LPT) algorithm for load balancing: sort
9
+ tasks by decreasing duration and assigns each task to the worker with
10
+ the currently smallest total assigned load.
11
+
12
+ Args:
13
+ tasks (dict[Hashable, int]): A dict mapping task identifiers (can
14
+ be any hashable type) to their estimated durations.
15
+ n_workers (int): Number of workers to distribute tasks across.
16
+
17
+ Returns:
18
+ assignments (list[list[Hashable]]): A list of lists, where
19
+ assignments[i] contains the IDs of tasks assigned to worker i.
20
+ """
21
+ # Sort tasks by descending duration
22
+ sorted_tasks = sorted(tasks.items(), key=lambda x: x[1], reverse=True)
23
+
24
+ # Initialize workers and their current loads
25
+ worker_loads = np.zeros(n_workers)
26
+ assignments = [[] for _ in range(n_workers)]
27
+
28
+ # Assign each task to the currently least-loaded worker
29
+ for task_id, duration in sorted_tasks:
30
+ i = int(np.argmin(worker_loads))
31
+ assignments[i].append(task_id)
32
+ worker_loads[i] += duration
33
+
34
+ return assignments
@@ -0,0 +1,159 @@
1
+ import numpy as np
2
+ import json
3
+ import logging
4
+ import imageio.v2 as imageio
5
+ from pathlib import Path
6
+
7
+
8
+ def read_frames_from_video(
9
+ video_path: Path | str, frame_indices: list[int] | None = None
10
+ ) -> tuple[list[np.ndarray], float]:
11
+ """Read specific frames from a video file.
12
+
13
+ Args:
14
+ video_path (Path | str): Path to the video file.
15
+ frame_indices (list[int] | None): List of frame indices to read.
16
+ If None, read all frames.
17
+
18
+ Raises:
19
+ ValueError: If the video file cannot be read.
20
+ IndexError: If the frame indices are invalid.
21
+
22
+ Returns:
23
+ frames (list[np.ndarray]): List of frames as numpy arrays.
24
+ fps (float): FPS of the video.
25
+ """
26
+ frames = []
27
+ with imageio.get_reader(video_path) as reader:
28
+ if frame_indices is None:
29
+ frame_indices = list(range(reader.count_frames()))
30
+ for idx in frame_indices:
31
+ frames.append(reader.get_data(idx))
32
+ fps = reader.get_meta_data().get("fps", None)
33
+ return frames, fps
34
+
35
+
36
+ _default_ffmpeg_params_for_video_writing = [
37
+ "-crf",
38
+ "15", # Lower CRF = higher quality (15 is very high quality)
39
+ "-preset",
40
+ "slow", # Slower preset = better compression efficiency
41
+ "-profile:v",
42
+ "high", # Use high profile for better compression
43
+ "-level",
44
+ "4.0", # H.264 level
45
+ ]
46
+
47
+
48
+ def write_frames_to_video(
49
+ video_path: Path | str,
50
+ frames: list[np.ndarray],
51
+ fps: float,
52
+ codec: str = "libx264",
53
+ ffmpeg_params: list[str] = _default_ffmpeg_params_for_video_writing,
54
+ log_interval: int | None = None,
55
+ log_level: int = logging.INFO,
56
+ ):
57
+ """Write a sequence of frames to a video file.
58
+
59
+ Args:
60
+ video_path (Path | str): Path to save the video file.
61
+ frames (list[np.ndarray]): List of frames as numpy arrays (in
62
+ [height, width, channels] format).
63
+ fps (float): Frames per second for the output video.
64
+ codec (str): Codec to use. Default: 'libx264'.
65
+ ffmpeg_params (list[str]): Additional ffmpeg parameters.
66
+ Default is a set of parameters for high-quality H.264 encoding.
67
+ (see _default_ffmpeg_params_for_video_writing).
68
+ log_interval (int | None): If set, log progress every
69
+ `log_interval` frames at the specified log level.
70
+ log_level (int): Logging level for progress. Default: logging.INFO.
71
+ """
72
+ # Check frame size consistency
73
+ if len(frames) == 0:
74
+ raise ValueError("No frames provided to write_frames_to_video")
75
+ frame_size = frames[0].shape[:2]
76
+ for frame in frames:
77
+ if frame.shape[:2] != frame_size:
78
+ raise ValueError(
79
+ "All frames must have the same dimensions. The 0th frame has size "
80
+ f"{frame_size}, but at least one frame has size {frame.shape[:2]}."
81
+ )
82
+
83
+ # Use imageio to write video with ffmpeg backend
84
+ with imageio.get_writer(
85
+ str(video_path),
86
+ "ffmpeg",
87
+ fps=fps,
88
+ codec=codec,
89
+ quality=None, # Use CRF (in ffmpeg_params) instead of quality
90
+ ffmpeg_params=ffmpeg_params,
91
+ ) as video_writer:
92
+ for i, frame in enumerate(frames):
93
+ video_writer.append_data(frame)
94
+
95
+ if log_interval is not None and i % log_interval == 0:
96
+ logging.log(log_level, f"Written frame {i + 1}/{len(frames)}")
97
+
98
+
99
+ def check_num_frames(video_path: Path | str) -> int:
100
+ """Check number of frames in a video file."""
101
+ try:
102
+ with imageio.get_reader(video_path) as reader:
103
+ num_frames = reader.count_frames()
104
+ except Exception as e:
105
+ raise RuntimeError(f"Failed to open video file: {video_path}") from e
106
+ return num_frames
107
+
108
+
109
+ def get_video_metadata(
110
+ video_path: Path | str,
111
+ cache_metadata: bool = True,
112
+ use_cached_metadata: bool = True,
113
+ metadata_suffix: str = ".metadata.json",
114
+ ):
115
+ """Get number of frames, frame size, and FPS of a video file.
116
+
117
+ Args:
118
+ video_path (Path | str): Path to the video file.
119
+ cache_metadata (bool): Whether to cache the metadata to a JSON
120
+ file. Default is True.
121
+ use_cached_metadata (bool): Whether to use cached metadata if
122
+ available. Default is True.
123
+ metadata_suffix (str): Suffix to use for the metadata cache file.
124
+ Default is ".metadata.json".
125
+
126
+ Returns:
127
+ dict: A dictionary containing the video metadata.
128
+ """
129
+ metadata = {}
130
+
131
+ video_path = Path(video_path)
132
+ cache_path = video_path.with_suffix(metadata_suffix)
133
+ if use_cached_metadata and cache_path.is_file():
134
+ try:
135
+ with open(cache_path, "r") as f:
136
+ metadata = json.load(f)
137
+ n_frames = metadata["n_frames"]
138
+ frame_size = tuple(metadata["frame_size"])
139
+ fps = metadata["fps"]
140
+ except Exception as e:
141
+ print(f"Corrupted metadata cache file {cache_path}")
142
+ raise e
143
+ else:
144
+ n_frames = check_num_frames(video_path)
145
+ sample_frames, fps = read_frames_from_video(
146
+ video_path, frame_indices=[0]
147
+ )
148
+ frame_size = sample_frames[0].shape[:2]
149
+
150
+ if cache_metadata:
151
+ metadata = {
152
+ "n_frames": n_frames,
153
+ "frame_size": list(frame_size),
154
+ "fps": fps,
155
+ }
156
+ with open(cache_path, "w") as f:
157
+ json.dump(metadata, f, indent=2)
158
+
159
+ return {"n_frames": n_frames, "frame_size": frame_size, "fps": fps}