parallel-video-io 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parallel_video_io-0.1.3/LICENSE +21 -0
- parallel_video_io-0.1.3/PKG-INFO +174 -0
- parallel_video_io-0.1.3/README.md +147 -0
- parallel_video_io-0.1.3/pyproject.toml +31 -0
- parallel_video_io-0.1.3/src/pvio/__init__.py +7 -0
- parallel_video_io-0.1.3/src/pvio/torch.py +220 -0
- parallel_video_io-0.1.3/src/pvio/util.py +34 -0
- parallel_video_io-0.1.3/src/pvio/video_io.py +159 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Sibo Wang-Chen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: parallel-video-io
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Tools for reading and writing videos, and loading them efficiently with PyTorch.
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Author: Sibo Wang-Chen
|
|
8
|
+
Author-email: sibo.wang@epfl.ch
|
|
9
|
+
Requires-Python: >=3.10,<=3.13
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: imageio (>=2.37.0,<3)
|
|
17
|
+
Requires-Dist: imageio-ffmpeg (==0.6.0)
|
|
18
|
+
Requires-Dist: joblib (>=1.5.1,<2)
|
|
19
|
+
Requires-Dist: numpy (>=2.0,<3)
|
|
20
|
+
Requires-Dist: pytest (>=8.4.2,<9)
|
|
21
|
+
Requires-Dist: torch (>=2.8,<3)
|
|
22
|
+
Requires-Dist: torchcodec (==0.7.0)
|
|
23
|
+
Requires-Dist: tqdm (>=4.67,<5)
|
|
24
|
+
Project-URL: Repository, https://github.com/sibocw/parallel-video-io
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# parallel-video-io
|
|
28
|
+
|
|
29
|
+
Tools for reading and writing videos and for efficient frame-level loading with PyTorch.
|
|
30
|
+
|
|
31
|
+
This repository provides small, focused utilities around video I/O and a PyTorch-friendly iterable dataset + dataloader that make it easy to stream frames from many videos or directories of image frames in parallel.
|
|
32
|
+
|
|
33
|
+
## Key features
|
|
34
|
+
- Read frames from videos (random access or sequential) using imageio/ffmpeg.
|
|
35
|
+
- Write sequences of numpy frames to H.264 MP4 files with sane defaults.
|
|
36
|
+
- PyTorch-compatible `VideoCollectionDataset` and `VideoCollectionDataLoader` that provide a simple iterator that uses multiple processes to load data from different videos under the hood. This is especially handy for running trained deep learning models on many videos in production.
|
|
37
|
+
|
|
38
|
+
## Table of contents
|
|
39
|
+
- [Installation](#installation)
|
|
40
|
+
- [Quick examples](#quick-examples)
|
|
41
|
+
- [Reading video metadata](#reading-video-metadata)
|
|
42
|
+
- [Reading video frames](#reading-video-frames)
|
|
43
|
+
- [Writing a video](#writing-a-video)
|
|
44
|
+
- [Using the PyTorch dataset and dataloader](#using-the-pytorch-dataset-and-dataloader)
|
|
45
|
+
- [Testing](#testing)
|
|
46
|
+
- [Notes & troubleshooting](#notes--troubleshooting)
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
This project targets Python >= 3.10. The library's runtime dependencies are listed in `pyproject.toml` (torch, imageio, imageio-ffmpeg, torchcodec, joblib, tqdm, numpy, pytest).
|
|
51
|
+
|
|
52
|
+
If you're using pip in a development environment, install editable with:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install -e .
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Or with Poetry:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
poetry install
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
You can include this package as a dependency for your project by including the following in your `pyproject.toml`:
|
|
65
|
+
|
|
66
|
+
```toml
|
|
67
|
+
[project]
|
|
68
|
+
# ... other stuff
|
|
69
|
+
dependencies = [
|
|
70
|
+
# ...
|
|
71
|
+
"parallel-video-io @ git+https://github.com/sibocw/parallel-video-io.git",
|
|
72
|
+
]
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Make sure `ffmpeg` is available on your `$PATH` (required by imageio-ffmpeg).
|
|
76
|
+
|
|
77
|
+
## Quick examples
|
|
78
|
+
|
|
79
|
+
These examples use NumPy arrays for frames in (height, width, channels) order and uint8 dtype.
|
|
80
|
+
|
|
81
|
+
### Reading video metadata
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from pvio.video_io import get_video_metadata, check_num_frames
|
|
85
|
+
|
|
86
|
+
# To get the number of frames in a video
|
|
87
|
+
n_frames = check_num_frames("example.mp4")
|
|
88
|
+
print(n_frames) # this is an integer frame count
|
|
89
|
+
|
|
90
|
+
# To get more information
|
|
91
|
+
# Note that this function actually caches these information in a JSON file. To control
|
|
92
|
+
# whether you want to save the cache file or disregard existing cache files, set the
|
|
93
|
+
# `cache_metadata` (default True) and `use_cached_metadata` (default True) arguments.
|
|
94
|
+
meta = get_video_metadata("example.mp4")
|
|
95
|
+
print(meta) # meta is a dictionary containing the keys "n_frames", "frame_size", "fps"
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Reading video frames
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from pvio.video_io import read_frames_from_video
|
|
102
|
+
|
|
103
|
+
# You can read a whole video
|
|
104
|
+
frames, fps = read_frames_from_video("example.mp4")
|
|
105
|
+
|
|
106
|
+
# ... or just some frames
|
|
107
|
+
frames, fps = read_frames_from_video("example.mp4", frame_indices=[0, 5])
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Writing a video
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
import numpy as np
|
|
114
|
+
from pvio.video_io import write_frames_to_video
|
|
115
|
+
|
|
116
|
+
# Create dummy 32x32 RGB frames (H, W, C)
|
|
117
|
+
frames = [np.full((32, 32, 3), fill_value=i, dtype=np.uint8) for i in range(10)]
|
|
118
|
+
|
|
119
|
+
# Save them to file
|
|
120
|
+
# There are more complex video writing parameters that can be tuned - see the docstring
|
|
121
|
+
# for details.
|
|
122
|
+
write_frames_to_video("example.mp4", frames, fps=25.0)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Notes: the writer verifies that all frames share the same (height, width). FFmpeg can
|
|
126
|
+
automatically resize frames to meet codec alignment requirements; for deterministic
|
|
127
|
+
results, use dimensions divisible by 16.
|
|
128
|
+
|
|
129
|
+
### Using the PyTorch dataset and dataloader
|
|
130
|
+
|
|
131
|
+
The `VideoCollectionDataset` iterates frames either from video files or from directories containing individual image frames.
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from pvio.torch import VideoCollectionDataset, VideoCollectionDataLoader
|
|
135
|
+
|
|
136
|
+
# Initialize Dataset from video files
|
|
137
|
+
paths = ["/path/to/video1.mp4", "/path/to/video2.mp4"]
|
|
138
|
+
ds = VideoCollectionDataset(paths)
|
|
139
|
+
# ... or from directories containing individual frames as images
|
|
140
|
+
paths = ["/path/to/frames_dir1", "/path/to/frames_dir2"]
|
|
141
|
+
# To control sorting of frame files within each dir, use the `frame_sorting` argument
|
|
142
|
+
# (see docstring for details)
|
|
143
|
+
ds = VideoCollectionDataset(paths, as_image_dirs=True)
|
|
144
|
+
|
|
145
|
+
# Wrap in the special DataLoader
|
|
146
|
+
# (you can add other DataLoader keyword arguments if you wish)
|
|
147
|
+
loader = VideoCollectionDataLoader(ds, batch_size=8, num_workers=4)
|
|
148
|
+
|
|
149
|
+
# Now you can iterate over all frames from all videos in a single iterator. Behind the
|
|
150
|
+
# scenes, these frames are fetched in parallel (each worker handles one video at a time)
|
|
151
|
+
for batch in loader:
|
|
152
|
+
frames = batch["frames"] # torch.Tensor: B x C x H x W
|
|
153
|
+
video_paths = batch["video_paths"] # list of Path or str, depending on input
|
|
154
|
+
frame_indices = batch["frame_indices"] # list of int
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
When loading from video files (as_image_dirs=False), the dataset uses `torchcodec`'s `VideoDecoder` to decode frames and `get_video_metadata` to build per-video frame counts; you may want to enable caching if you index many large files.
|
|
158
|
+
|
|
159
|
+
## Testing
|
|
160
|
+
|
|
161
|
+
The test suite uses pytest. Run it from the repository root:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pytest tests
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
There are a few tests that write small MP4 files using imageio/ffmpeg; ensure `ffmpeg` is available in the environment where tests run.
|
|
168
|
+
|
|
169
|
+
## Notes & troubleshooting
|
|
170
|
+
|
|
171
|
+
- FFmpeg macroblock constraints: some ffmpeg builds require frame dimensions to be divisible by 16. If you see a warning about `macro_block_size=16` and unexpected resizing, choose frame sizes divisible by 16 in production pipelines.
|
|
172
|
+
- If you plan to decode many large videos, enabling metadata caching (the package writes a `.metadata.json` next to each video when `get_video_metadata` is called) will speed up repeated indexing.
|
|
173
|
+
- The PyTorch loader expects the dataset passed to `VideoCollectionDataLoader` to be an instance of `VideoCollectionDataset` and enforces the built-in collate function.
|
|
174
|
+
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# parallel-video-io
|
|
2
|
+
|
|
3
|
+
Tools for reading and writing videos and for efficient frame-level loading with PyTorch.
|
|
4
|
+
|
|
5
|
+
This repository provides small, focused utilities around video I/O and a PyTorch-friendly iterable dataset + dataloader that make it easy to stream frames from many videos or directories of image frames in parallel.
|
|
6
|
+
|
|
7
|
+
## Key features
|
|
8
|
+
- Read frames from videos (random access or sequential) using imageio/ffmpeg.
|
|
9
|
+
- Write sequences of numpy frames to H.264 MP4 files with sane defaults.
|
|
10
|
+
- PyTorch-compatible `VideoCollectionDataset` and `VideoCollectionDataLoader` that provide a simple iterator that uses multiple processes to load data from different videos under the hood. This is especially handy for running trained deep learning models on many videos in production.
|
|
11
|
+
|
|
12
|
+
## Table of contents
|
|
13
|
+
- [Installation](#installation)
|
|
14
|
+
- [Quick examples](#quick-examples)
|
|
15
|
+
- [Reading video metadata](#reading-video-metadata)
|
|
16
|
+
- [Reading video frames](#reading-video-frames)
|
|
17
|
+
- [Writing a video](#writing-a-video)
|
|
18
|
+
- [Using the PyTorch dataset and dataloader](#using-the-pytorch-dataset-and-dataloader)
|
|
19
|
+
- [Testing](#testing)
|
|
20
|
+
- [Notes & troubleshooting](#notes--troubleshooting)
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
This project targets Python >= 3.10. The library's runtime dependencies are listed in `pyproject.toml` (torch, imageio, imageio-ffmpeg, torchcodec, joblib, tqdm, numpy, pytest).
|
|
25
|
+
|
|
26
|
+
If you're using pip in a development environment, install editable with:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install -e .
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Or with Poetry:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
poetry install
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
You can include this package as a dependency for your project by including the following in your `pyproject.toml`:
|
|
39
|
+
|
|
40
|
+
```toml
|
|
41
|
+
[project]
|
|
42
|
+
# ... other stuff
|
|
43
|
+
dependencies = [
|
|
44
|
+
# ...
|
|
45
|
+
"parallel-video-io @ git+https://github.com/sibocw/parallel-video-io.git",
|
|
46
|
+
]
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Make sure `ffmpeg` is available on your `$PATH` (required by imageio-ffmpeg).
|
|
50
|
+
|
|
51
|
+
## Quick examples
|
|
52
|
+
|
|
53
|
+
These examples use NumPy arrays for frames in (height, width, channels) order and uint8 dtype.
|
|
54
|
+
|
|
55
|
+
### Reading video metadata
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from pvio.video_io import get_video_metadata, check_num_frames
|
|
59
|
+
|
|
60
|
+
# To get the number of frames in a video
|
|
61
|
+
n_frames = check_num_frames("example.mp4")
|
|
62
|
+
print(n_frames) # this is an integer frame count
|
|
63
|
+
|
|
64
|
+
# To get more information
|
|
65
|
+
# Note that this function actually caches these information in a JSON file. To control
|
|
66
|
+
# whether you want to save the cache file or disregard existing cache files, set the
|
|
67
|
+
# `cache_metadata` (default True) and `use_cached_metadata` (default True) arguments.
|
|
68
|
+
meta = get_video_metadata("example.mp4")
|
|
69
|
+
print(meta) # meta is a dictionary containing the keys "n_frames", "frame_size", "fps"
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Reading video frames
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from pvio.video_io import read_frames_from_video
|
|
76
|
+
|
|
77
|
+
# You can read a whole video
|
|
78
|
+
frames, fps = read_frames_from_video("example.mp4")
|
|
79
|
+
|
|
80
|
+
# ... or just some frames
|
|
81
|
+
frames, fps = read_frames_from_video("example.mp4", frame_indices=[0, 5])
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Writing a video
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
import numpy as np
|
|
88
|
+
from pvio.video_io import write_frames_to_video
|
|
89
|
+
|
|
90
|
+
# Create dummy 32x32 RGB frames (H, W, C)
|
|
91
|
+
frames = [np.full((32, 32, 3), fill_value=i, dtype=np.uint8) for i in range(10)]
|
|
92
|
+
|
|
93
|
+
# Save them to file
|
|
94
|
+
# There are more complex video writing parameters that can be tuned - see the docstring
|
|
95
|
+
# for details.
|
|
96
|
+
write_frames_to_video("example.mp4", frames, fps=25.0)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Notes: the writer verifies that all frames share the same (height, width). FFmpeg can
|
|
100
|
+
automatically resize frames to meet codec alignment requirements; for deterministic
|
|
101
|
+
results, use dimensions divisible by 16.
|
|
102
|
+
|
|
103
|
+
### Using the PyTorch dataset and dataloader
|
|
104
|
+
|
|
105
|
+
The `VideoCollectionDataset` iterates frames either from video files or from directories containing individual image frames.
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from pvio.torch import VideoCollectionDataset, VideoCollectionDataLoader
|
|
109
|
+
|
|
110
|
+
# Initialize Dataset from video files
|
|
111
|
+
paths = ["/path/to/video1.mp4", "/path/to/video2.mp4"]
|
|
112
|
+
ds = VideoCollectionDataset(paths)
|
|
113
|
+
# ... or from directories containing individual frames as images
|
|
114
|
+
paths = ["/path/to/frames_dir1", "/path/to/frames_dir2"]
|
|
115
|
+
# To control sorting of frame files within each dir, use the `frame_sorting` argument
|
|
116
|
+
# (see docstring for details)
|
|
117
|
+
ds = VideoCollectionDataset(paths, as_image_dirs=True)
|
|
118
|
+
|
|
119
|
+
# Wrap in the special DataLoader
|
|
120
|
+
# (you can add other DataLoader keyword arguments if you wish)
|
|
121
|
+
loader = VideoCollectionDataLoader(ds, batch_size=8, num_workers=4)
|
|
122
|
+
|
|
123
|
+
# Now you can iterate over all frames from all videos in a single iterator. Behind the
|
|
124
|
+
# scenes, these frames are fetched in parallel (each worker handles one video at a time)
|
|
125
|
+
for batch in loader:
|
|
126
|
+
frames = batch["frames"] # torch.Tensor: B x C x H x W
|
|
127
|
+
video_paths = batch["video_paths"] # list of Path or str, depending on input
|
|
128
|
+
frame_indices = batch["frame_indices"] # list of int
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
When loading from video files (as_image_dirs=False), the dataset uses `torchcodec`'s `VideoDecoder` to decode frames and `get_video_metadata` to build per-video frame counts; you may want to enable caching if you index many large files.
|
|
132
|
+
|
|
133
|
+
## Testing
|
|
134
|
+
|
|
135
|
+
The test suite uses pytest. Run it from the repository root:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
pytest tests
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
There are a few tests that write small MP4 files using imageio/ffmpeg; ensure `ffmpeg` is available in the environment where tests run.
|
|
142
|
+
|
|
143
|
+
## Notes & troubleshooting
|
|
144
|
+
|
|
145
|
+
- FFmpeg macroblock constraints: some ffmpeg builds require frame dimensions to be divisible by 16. If you see a warning about `macro_block_size=16` and unexpected resizing, choose frame sizes divisible by 16 in production pipelines.
|
|
146
|
+
- If you plan to decode many large videos, enabling metadata caching (the package writes a `.metadata.json` next to each video when `get_video_metadata` is called) will speed up repeated indexing.
|
|
147
|
+
- The PyTorch loader expects the dataset passed to `VideoCollectionDataLoader` to be an instance of `VideoCollectionDataset` and enforces the built-in collate function.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "parallel-video-io"
|
|
3
|
+
version = "0.1.3"
|
|
4
|
+
description = "Tools for reading and writing videos, and loading them efficiently with PyTorch."
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Sibo Wang-Chen", email = "sibo.wang@epfl.ch"}
|
|
7
|
+
]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
license = {text = "MIT"}
|
|
10
|
+
requires-python = ">=3.10,<=3.13"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"torch>=2.8,<3",
|
|
13
|
+
"imageio>=2.37.0,<3",
|
|
14
|
+
"imageio-ffmpeg==0.6.0",
|
|
15
|
+
"torchcodec==0.7.0",
|
|
16
|
+
"joblib>=1.5.1,<2",
|
|
17
|
+
"tqdm>=4.67,<5",
|
|
18
|
+
"numpy>=2.0,<3",
|
|
19
|
+
"pytest>=8.4.2,<9",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
repository = "https://github.com/sibocw/parallel-video-io"
|
|
24
|
+
|
|
25
|
+
[tool.poetry]
|
|
26
|
+
packages = [{include = "pvio", from = "src"}]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
[build-system]
|
|
30
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
31
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
import imageio.v2 as imageio
|
|
5
|
+
from typing import Callable
|
|
6
|
+
from torchcodec.decoders import VideoDecoder
|
|
7
|
+
from torch.utils.data import IterableDataset, DataLoader, get_worker_info
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from tqdm import tqdm
|
|
10
|
+
from joblib import Parallel, delayed
|
|
11
|
+
|
|
12
|
+
from .video_io import get_video_metadata
|
|
13
|
+
from .util import balance_load_lpt
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class VideoCollectionDataset(IterableDataset):
|
|
17
|
+
"""Yields individual frames from Spotlight behavior recordings, either
|
|
18
|
+
from videos or from image sequences."""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
paths: list[Path | str],
|
|
23
|
+
as_image_dirs: bool = False,
|
|
24
|
+
frame_sorting: None | str = None,
|
|
25
|
+
transform: Callable | None = None,
|
|
26
|
+
):
|
|
27
|
+
r"""
|
|
28
|
+
Args:
|
|
29
|
+
paths (list[Path]): List of to video paths, or directories
|
|
30
|
+
containing frames as individual images.
|
|
31
|
+
as_image_dirs (bool): If True, treat each path as a directory
|
|
32
|
+
containing individual frames. Otherwise, treat it as a
|
|
33
|
+
video file.
|
|
34
|
+
frame_sorting (str | None): When `as_image_dirs` is True, this
|
|
35
|
+
argument specifies how images within each directory should
|
|
36
|
+
be sorted. If None, files are sorted by name. If given as a
|
|
37
|
+
string, it is used as a regex pattern to extract frame
|
|
38
|
+
numbers from filenames (e.g. r"frame\D*(\d+)(?!\d)").
|
|
39
|
+
When `as_image_dirs` is False, this argument is ignored.
|
|
40
|
+
transform (Callable | None): A function that is to be applied
|
|
41
|
+
to each frame after loading. Note that the following
|
|
42
|
+
operations are already applied to each frame:
|
|
43
|
+
(i) conversion from numpy array to torch tensor,
|
|
44
|
+
(ii) conversion from HWC to CHW format, and
|
|
45
|
+
(iii) conversion from uint8 in [0, 255] to float in [0, 1].
|
|
46
|
+
The transform function, if provided, is applied after these
|
|
47
|
+
operations.
|
|
48
|
+
"""
|
|
49
|
+
self.video_paths = [Path(p) for p in paths]
|
|
50
|
+
self.worker_assignments = None
|
|
51
|
+
self.as_image_dirs = as_image_dirs
|
|
52
|
+
self.frame_sorting = frame_sorting
|
|
53
|
+
self.n_frames_lookup = None # Populated by assign_workers()
|
|
54
|
+
self.transform = transform
|
|
55
|
+
|
|
56
|
+
# Check if the paths are all valid
|
|
57
|
+
for p in self.video_paths:
|
|
58
|
+
if self.as_image_dirs:
|
|
59
|
+
if not p.is_dir():
|
|
60
|
+
raise ValueError(
|
|
61
|
+
f"One of the specified paths {p} is not a valid directory. "
|
|
62
|
+
"Directories containing individual frame images are expected."
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
if not p.is_file():
|
|
66
|
+
raise ValueError(
|
|
67
|
+
f"One of the specified paths {p} is not a valid file. "
|
|
68
|
+
"Video files are expected."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Sort images if we're loading from directories of images
|
|
72
|
+
self.frame_sortings = {}
|
|
73
|
+
regex = re.compile(frame_sorting) if frame_sorting else None
|
|
74
|
+
if as_image_dirs:
|
|
75
|
+
# Iterate over the canonical Path objects (self.video_paths) so we
|
|
76
|
+
# consistently store Path keys and avoid relying on caller types
|
|
77
|
+
for path in self.video_paths:
|
|
78
|
+
all_files = [f for f in path.iterdir() if f.is_file()]
|
|
79
|
+
if regex is None:
|
|
80
|
+
sorting_func = lambda f: f.name
|
|
81
|
+
else:
|
|
82
|
+
sorting_func = lambda f: self._extract_frame_number(f.name, regex)
|
|
83
|
+
# Store a new sorted list (list.sort() returns None)
|
|
84
|
+
self.frame_sortings[path] = sorted(all_files, key=sorting_func)
|
|
85
|
+
|
|
86
|
+
def assign_workers(
|
|
87
|
+
self, n_frame_loading_workers: int, n_metadata_indexing_workers: int = -1
|
|
88
|
+
):
|
|
89
|
+
# Check how many frame loading workers we're actually using (e.g. -1 actually
|
|
90
|
+
# means all available cores, so we need to figure out how many that is)
|
|
91
|
+
n_frame_loading_workers_effective = Parallel(
|
|
92
|
+
n_jobs=n_frame_loading_workers
|
|
93
|
+
)._effective_n_jobs()
|
|
94
|
+
logging.info(
|
|
95
|
+
f"Caller specified {n_frame_loading_workers} workers for frame loading. "
|
|
96
|
+
f"This is effectively {n_frame_loading_workers_effective} workers."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Figure out how many frames there are in each video. This allows us to split
|
|
100
|
+
# the workload more evenly among workers by the number of frames.
|
|
101
|
+
if self.as_image_dirs:
|
|
102
|
+
self.n_frames_lookup = {
|
|
103
|
+
path: len(frames) for path, frames in self.frame_sortings.items()
|
|
104
|
+
}
|
|
105
|
+
else:
|
|
106
|
+
# Count frames in videos. This requires partially decoding the video files
|
|
107
|
+
# and it can be quite slow, so we do it in parallel and use caches.
|
|
108
|
+
logging.info(
|
|
109
|
+
f"Loading metadata for {len(self.video_paths)} videos. "
|
|
110
|
+
"This may take a while if no cached metadata is available."
|
|
111
|
+
)
|
|
112
|
+
metas = Parallel(n_jobs=n_metadata_indexing_workers)(
|
|
113
|
+
delayed(get_video_metadata)(path)
|
|
114
|
+
for path in tqdm(self.video_paths, desc="Indexing videos", disable=None)
|
|
115
|
+
)
|
|
116
|
+
self.n_frames_lookup = {
|
|
117
|
+
path: meta["n_frames"] for path, meta in zip(self.video_paths, metas)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# Split videos evenly among the available workers
|
|
121
|
+
self.worker_assignments = balance_load_lpt(
|
|
122
|
+
self.n_frames_lookup, max(1, n_frame_loading_workers_effective)
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def __iter__(self):
|
|
126
|
+
# Get worker info for distributed loading
|
|
127
|
+
worker_info = get_worker_info()
|
|
128
|
+
if worker_info is None:
|
|
129
|
+
# Single process
|
|
130
|
+
video_subset = self.video_paths
|
|
131
|
+
else:
|
|
132
|
+
# Split videos among workers
|
|
133
|
+
video_subset = self.worker_assignments[worker_info.id]
|
|
134
|
+
|
|
135
|
+
# Each worker sequentially decodes its assigned videos
|
|
136
|
+
for video_path in video_subset:
|
|
137
|
+
if self.as_image_dirs:
|
|
138
|
+
# Read individual images
|
|
139
|
+
frame_files = self.frame_sortings[video_path]
|
|
140
|
+
for frame_idx, frame_file in enumerate(frame_files):
|
|
141
|
+
frame = imageio.imread(frame_file)
|
|
142
|
+
frame = torch.from_numpy(frame)
|
|
143
|
+
if frame.ndim == 2:
|
|
144
|
+
frame = frame.unsqueeze(-1) # add channel dim
|
|
145
|
+
frame = frame.permute(2, 0, 1) # HWC to CHW
|
|
146
|
+
frame = frame.float() / 255.0 # to float in [0, 1]
|
|
147
|
+
if self.transform:
|
|
148
|
+
frame = self.transform(frame)
|
|
149
|
+
yield {
|
|
150
|
+
"frame": frame,
|
|
151
|
+
"video_path": video_path,
|
|
152
|
+
"frame_idx": frame_idx,
|
|
153
|
+
}
|
|
154
|
+
else:
|
|
155
|
+
# Use torchcodec to decode videos
|
|
156
|
+
decoder = VideoDecoder(video_path)
|
|
157
|
+
for frame_idx in range(len(decoder)):
|
|
158
|
+
frame = decoder[frame_idx] # returns tensor in CHW
|
|
159
|
+
frame = frame.float() / 255.0 # to float in [0, 1]
|
|
160
|
+
if self.transform:
|
|
161
|
+
frame = self.transform(frame)
|
|
162
|
+
yield {
|
|
163
|
+
"frame": frame,
|
|
164
|
+
"video_path": video_path,
|
|
165
|
+
"frame_idx": frame_idx,
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
def __len__(self):
|
|
169
|
+
if self.n_frames_lookup is None:
|
|
170
|
+
raise ValueError(
|
|
171
|
+
"VideoCollectionDataset length is unknown until workers are assigned. "
|
|
172
|
+
"Call `assign_workers()` before using `len()`."
|
|
173
|
+
)
|
|
174
|
+
return sum(self.n_frames_lookup.values())
|
|
175
|
+
|
|
176
|
+
@staticmethod
|
|
177
|
+
def _extract_frame_number(filename: str, regex_pattern: str) -> int:
|
|
178
|
+
matches = re.findall(regex_pattern, filename)
|
|
179
|
+
if len(matches) != 1:
|
|
180
|
+
raise ValueError(
|
|
181
|
+
f"{len(matches)} matches found in filename {filename} "
|
|
182
|
+
f"using regex pattern {regex_pattern}. Only one match is expected."
|
|
183
|
+
)
|
|
184
|
+
try:
|
|
185
|
+
return int(matches[0])
|
|
186
|
+
except ValueError as e:
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Failed to parse '{matches[0]}' as int. This substring is extracted "
|
|
189
|
+
f"from filename {filename} using regex pattern {regex_pattern}."
|
|
190
|
+
) from e
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class VideoCollectionDataLoader(DataLoader):
|
|
194
|
+
def __init__(self, dataset: VideoCollectionDataset, **kwargs):
|
|
195
|
+
if not isinstance(dataset, VideoCollectionDataset):
|
|
196
|
+
raise ValueError(
|
|
197
|
+
"VideoCollectionDataLoader only works with VideoCollectionDataset."
|
|
198
|
+
)
|
|
199
|
+
if kwargs.get("batch_sampler") is not None:
|
|
200
|
+
raise ValueError(
|
|
201
|
+
"VideoCollectionDataLoader does not support custom batch samplers."
|
|
202
|
+
)
|
|
203
|
+
if kwargs.get("collate_fn") is not None:
|
|
204
|
+
raise ValueError(
|
|
205
|
+
"VideoCollectionDataLoader must use the built-in collate function."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
kwargs["collate_fn"] = self._collate
|
|
209
|
+
super().__init__(dataset, **kwargs)
|
|
210
|
+
|
|
211
|
+
self.dataset.assign_workers(n_frame_loading_workers=self.num_workers)
|
|
212
|
+
|
|
213
|
+
@staticmethod
|
|
214
|
+
def _collate(batch):
|
|
215
|
+
"""Receives a list of frame dicts, returns a batched dict"""
|
|
216
|
+
return {
|
|
217
|
+
"frames": torch.stack([item["frame"] for item in batch]),
|
|
218
|
+
"video_paths": [item["video_path"] for item in batch],
|
|
219
|
+
"frame_indices": [item["frame_idx"] for item in batch],
|
|
220
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from typing import Hashable
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def balance_load_lpt(
|
|
6
|
+
tasks: dict[Hashable, int], n_workers: int
|
|
7
|
+
) -> list[list[Hashable]]:
|
|
8
|
+
"""The Longest Processing Time (LPT) algorithm for load balancing: sort
|
|
9
|
+
tasks by decreasing duration and assigns each task to the worker with
|
|
10
|
+
the currently smallest total assigned load.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
tasks (dict[Hashable, int]): A dict mapping task identifiers (can
|
|
14
|
+
be any hashable type) to their estimated durations.
|
|
15
|
+
n_workers (int): Number of workers to distribute tasks across.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
assignments (list[list[Hashable]]): A list of lists, where
|
|
19
|
+
assignments[i] contains the IDs of tasks assigned to worker i.
|
|
20
|
+
"""
|
|
21
|
+
# Sort tasks by descending duration
|
|
22
|
+
sorted_tasks = sorted(tasks.items(), key=lambda x: x[1], reverse=True)
|
|
23
|
+
|
|
24
|
+
# Initialize workers and their current loads
|
|
25
|
+
worker_loads = np.zeros(n_workers)
|
|
26
|
+
assignments = [[] for _ in range(n_workers)]
|
|
27
|
+
|
|
28
|
+
# Assign each task to the currently least-loaded worker
|
|
29
|
+
for task_id, duration in sorted_tasks:
|
|
30
|
+
i = int(np.argmin(worker_loads))
|
|
31
|
+
assignments[i].append(task_id)
|
|
32
|
+
worker_loads[i] += duration
|
|
33
|
+
|
|
34
|
+
return assignments
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import imageio.v2 as imageio
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def read_frames_from_video(
|
|
9
|
+
video_path: Path | str, frame_indices: list[int] | None = None
|
|
10
|
+
) -> tuple[list[np.ndarray], float]:
|
|
11
|
+
"""Read specific frames from a video file.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
video_path (Path | str): Path to the video file.
|
|
15
|
+
frame_indices (list[int] | None): List of frame indices to read.
|
|
16
|
+
If None, read all frames.
|
|
17
|
+
|
|
18
|
+
Raises:
|
|
19
|
+
ValueError: If the video file cannot be read.
|
|
20
|
+
IndexError: If the frame indices are invalid.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
frames (list[np.ndarray]): List of frames as numpy arrays.
|
|
24
|
+
fps (float): FPS of the video.
|
|
25
|
+
"""
|
|
26
|
+
frames = []
|
|
27
|
+
with imageio.get_reader(video_path) as reader:
|
|
28
|
+
if frame_indices is None:
|
|
29
|
+
frame_indices = list(range(reader.count_frames()))
|
|
30
|
+
for idx in frame_indices:
|
|
31
|
+
frames.append(reader.get_data(idx))
|
|
32
|
+
fps = reader.get_meta_data().get("fps", None)
|
|
33
|
+
return frames, fps
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_default_ffmpeg_params_for_video_writing = [
|
|
37
|
+
"-crf",
|
|
38
|
+
"15", # Lower CRF = higher quality (15 is very high quality)
|
|
39
|
+
"-preset",
|
|
40
|
+
"slow", # Slower preset = better compression efficiency
|
|
41
|
+
"-profile:v",
|
|
42
|
+
"high", # Use high profile for better compression
|
|
43
|
+
"-level",
|
|
44
|
+
"4.0", # H.264 level
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def write_frames_to_video(
|
|
49
|
+
video_path: Path | str,
|
|
50
|
+
frames: list[np.ndarray],
|
|
51
|
+
fps: float,
|
|
52
|
+
codec: str = "libx264",
|
|
53
|
+
ffmpeg_params: list[str] = _default_ffmpeg_params_for_video_writing,
|
|
54
|
+
log_interval: int | None = None,
|
|
55
|
+
log_level: int = logging.INFO,
|
|
56
|
+
):
|
|
57
|
+
"""Write a sequence of frames to a video file.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
video_path (Path | str): Path to save the video file.
|
|
61
|
+
frames (list[np.ndarray]): List of frames as numpy arrays (in
|
|
62
|
+
[height, width, channels] format).
|
|
63
|
+
fps (float): Frames per second for the output video.
|
|
64
|
+
codec (str): Codec to use. Default: 'libx264'.
|
|
65
|
+
ffmpeg_params (list[str]): Additional ffmpeg parameters.
|
|
66
|
+
Default is a set of parameters for high-quality H.264 encoding.
|
|
67
|
+
(see _default_ffmpeg_params_for_video_writing).
|
|
68
|
+
log_interval (int | None): If set, log progress every
|
|
69
|
+
`log_interval` frames at the specified log level.
|
|
70
|
+
log_level (int): Logging level for progress. Default: logging.INFO.
|
|
71
|
+
"""
|
|
72
|
+
# Check frame size consistency
|
|
73
|
+
if len(frames) == 0:
|
|
74
|
+
raise ValueError("No frames provided to write_frames_to_video")
|
|
75
|
+
frame_size = frames[0].shape[:2]
|
|
76
|
+
for frame in frames:
|
|
77
|
+
if frame.shape[:2] != frame_size:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
"All frames must have the same dimensions. The 0th frame has size "
|
|
80
|
+
f"{frame_size}, but at least one frame has size {frame.shape[:2]}."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Use imageio to write video with ffmpeg backend
|
|
84
|
+
with imageio.get_writer(
|
|
85
|
+
str(video_path),
|
|
86
|
+
"ffmpeg",
|
|
87
|
+
fps=fps,
|
|
88
|
+
codec=codec,
|
|
89
|
+
quality=None, # Use CRF (in ffmpeg_params) instead of quality
|
|
90
|
+
ffmpeg_params=ffmpeg_params,
|
|
91
|
+
) as video_writer:
|
|
92
|
+
for i, frame in enumerate(frames):
|
|
93
|
+
video_writer.append_data(frame)
|
|
94
|
+
|
|
95
|
+
if log_interval is not None and i % log_interval == 0:
|
|
96
|
+
logging.log(log_level, f"Written frame {i + 1}/{len(frames)}")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def check_num_frames(video_path: Path | str) -> int:
|
|
100
|
+
"""Check number of frames in a video file."""
|
|
101
|
+
try:
|
|
102
|
+
with imageio.get_reader(video_path) as reader:
|
|
103
|
+
num_frames = reader.count_frames()
|
|
104
|
+
except Exception as e:
|
|
105
|
+
raise RuntimeError(f"Failed to open video file: {video_path}") from e
|
|
106
|
+
return num_frames
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_video_metadata(
|
|
110
|
+
video_path: Path | str,
|
|
111
|
+
cache_metadata: bool = True,
|
|
112
|
+
use_cached_metadata: bool = True,
|
|
113
|
+
metadata_suffix: str = ".metadata.json",
|
|
114
|
+
):
|
|
115
|
+
"""Get number of frames, frame size, and FPS of a video file.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
video_path (Path | str): Path to the video file.
|
|
119
|
+
cache_metadata (bool): Whether to cache the metadata to a JSON
|
|
120
|
+
file. Default is True.
|
|
121
|
+
use_cached_metadata (bool): Whether to use cached metadata if
|
|
122
|
+
available. Default is True.
|
|
123
|
+
metadata_suffix (str): Suffix to use for the metadata cache file.
|
|
124
|
+
Default is ".metadata.json".
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
dict: A dictionary containing the video metadata.
|
|
128
|
+
"""
|
|
129
|
+
metadata = {}
|
|
130
|
+
|
|
131
|
+
video_path = Path(video_path)
|
|
132
|
+
cache_path = video_path.with_suffix(metadata_suffix)
|
|
133
|
+
if use_cached_metadata and cache_path.is_file():
|
|
134
|
+
try:
|
|
135
|
+
with open(cache_path, "r") as f:
|
|
136
|
+
metadata = json.load(f)
|
|
137
|
+
n_frames = metadata["n_frames"]
|
|
138
|
+
frame_size = tuple(metadata["frame_size"])
|
|
139
|
+
fps = metadata["fps"]
|
|
140
|
+
except Exception as e:
|
|
141
|
+
print(f"Corrupted metadata cache file {cache_path}")
|
|
142
|
+
raise e
|
|
143
|
+
else:
|
|
144
|
+
n_frames = check_num_frames(video_path)
|
|
145
|
+
sample_frames, fps = read_frames_from_video(
|
|
146
|
+
video_path, frame_indices=[0]
|
|
147
|
+
)
|
|
148
|
+
frame_size = sample_frames[0].shape[:2]
|
|
149
|
+
|
|
150
|
+
if cache_metadata:
|
|
151
|
+
metadata = {
|
|
152
|
+
"n_frames": n_frames,
|
|
153
|
+
"frame_size": list(frame_size),
|
|
154
|
+
"fps": fps,
|
|
155
|
+
}
|
|
156
|
+
with open(cache_path, "w") as f:
|
|
157
|
+
json.dump(metadata, f, indent=2)
|
|
158
|
+
|
|
159
|
+
return {"n_frames": n_frames, "frame_size": frame_size, "fps": fps}
|