ferroload 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ferroload-0.1.0/PKG-INFO +98 -0
- ferroload-0.1.0/README.md +66 -0
- ferroload-0.1.0/ferroload-codec/Cargo.toml +19 -0
- ferroload-0.1.0/ferroload-codec/src/audio_wav.rs +134 -0
- ferroload-0.1.0/ferroload-codec/src/image_codec.rs +80 -0
- ferroload-0.1.0/ferroload-codec/src/lib.rs +111 -0
- ferroload-0.1.0/ferroload-codec/src/sampling.rs +65 -0
- ferroload-0.1.0/ferroload-codec/src/tensor.rs +42 -0
- ferroload-0.1.0/ferroload-codec/src/video.rs +177 -0
- ferroload-0.1.0/ferroload-core/Cargo.toml +18 -0
- ferroload-0.1.0/ferroload-core/examples/synthetic_av.rs +92 -0
- ferroload-0.1.0/ferroload-core/src/dataset.rs +907 -0
- ferroload-0.1.0/ferroload-core/src/error.rs +45 -0
- ferroload-0.1.0/ferroload-core/src/index.rs +178 -0
- ferroload-0.1.0/ferroload-core/src/index_parquet.rs +357 -0
- ferroload-0.1.0/ferroload-core/src/lib.rs +56 -0
- ferroload-0.1.0/ferroload-core/src/manifest.rs +272 -0
- ferroload-0.1.0/ferroload-core/src/sampler.rs +192 -0
- ferroload-0.1.0/ferroload-core/src/shard.rs +195 -0
- ferroload-0.1.0/ferroload-core/src/sideindex.rs +68 -0
- ferroload-0.1.0/ferroload-core/src/subset.rs +336 -0
- ferroload-0.1.0/ferroload-core/tests/combinations.rs +140 -0
- ferroload-0.1.0/ferroload-core/tests/integration.rs +102 -0
- ferroload-0.1.0/ferroload-core/tests/layers.rs +182 -0
- ferroload-0.1.0/ferroload-py/Cargo.lock +732 -0
- ferroload-0.1.0/ferroload-py/Cargo.toml +25 -0
- ferroload-0.1.0/ferroload-py/README.md +66 -0
- ferroload-0.1.0/ferroload-py/src/lib.rs +705 -0
- ferroload-0.1.0/pyproject.toml +50 -0
- ferroload-0.1.0/python/ferroload/__init__.py +33 -0
- ferroload-0.1.0/python/ferroload/cli.py +199 -0
- ferroload-0.1.0/python/ferroload/dataset.py +413 -0
- ferroload-0.1.0/python/ferroload/executor.py +236 -0
- ferroload-0.1.0/python/ferroload/loader.py +409 -0
ferroload-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ferroload
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Classifier: Development Status :: 4 - Beta
|
|
5
|
+
Classifier: Intended Audience :: Developers
|
|
6
|
+
Classifier: Intended Audience :: Science/Research
|
|
7
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Programming Language :: Rust
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
14
|
+
Requires-Dist: datasets>=2.14 ; extra == 'hf'
|
|
15
|
+
Requires-Dist: pillow>=9 ; extra == 'hf'
|
|
16
|
+
Requires-Dist: huggingface-hub>=0.20 ; extra == 'hf'
|
|
17
|
+
Requires-Dist: torch>=2.0 ; extra == 'torch'
|
|
18
|
+
Requires-Dist: numpy>=1.21 ; extra == 'torch'
|
|
19
|
+
Provides-Extra: hf
|
|
20
|
+
Provides-Extra: torch
|
|
21
|
+
Summary: A pure-Rust multimodal dataset format + dataloader for PyTorch.
|
|
22
|
+
Keywords: pytorch,dataloader,dataset,multimodal,rust,machine-learning,computer-vision,video,audio
|
|
23
|
+
Author: Midhun
|
|
24
|
+
License: Apache-2.0
|
|
25
|
+
Requires-Python: >=3.9
|
|
26
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
27
|
+
Project-URL: Changelog, https://github.com/USERNAME/ferroload-rs/blob/main/CHANGELOG.md
|
|
28
|
+
Project-URL: Documentation, https://USERNAME.github.io/ferroload-rs/
|
|
29
|
+
Project-URL: Homepage, https://github.com/USERNAME/ferroload-rs
|
|
30
|
+
Project-URL: Repository, https://github.com/USERNAME/ferroload-rs
|
|
31
|
+
|
|
32
|
+
# ferroload
|
|
33
|
+
|
|
34
|
+
**A pure-Rust multimodal dataset format + dataloader for PyTorch.**
|
|
35
|
+
|
|
36
|
+
Ferroload stores images, video, audio, tensors, and rich metadata in a
|
|
37
|
+
self-contained, shardable on-disk format and serves them to training loops with
|
|
38
|
+
parallel decode in Rust (the GIL released), SQL-style subsetting, and a one-call
|
|
39
|
+
loader that drops straight into PyTorch.
|
|
40
|
+
|
|
41
|
+
## Install
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install ferroload
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Optional extras:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install "ferroload[hf]" # HuggingFace import tooling (datasets, pillow, hub)
|
|
51
|
+
pip install "ferroload[torch]" # torch + numpy, for the DataLoader glue
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
In-Rust video decode is feature-gated (it needs system ffmpeg) and is **not** in
|
|
55
|
+
the published wheel — build from source for it:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
maturin develop --release --features video
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Quickstart
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from ferroload import make_loader
|
|
65
|
+
|
|
66
|
+
dl = make_loader("/data/ds", batch_size=64,
|
|
67
|
+
columns=["image", "video", "label"], # kinds resolved from the manifest
|
|
68
|
+
resize=(224, 224), out="torch")
|
|
69
|
+
for epoch in range(epochs):
|
|
70
|
+
dl.set_epoch(epoch) # reshuffle (DDP-aware)
|
|
71
|
+
for batch in dl:
|
|
72
|
+
train_step(batch) # batch["image"], batch["video"], batch["label"]
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Enrich a dataset with an additive, resumable layer — functions bind to inputs
|
|
76
|
+
positionally and run once per sample by default, so they're generic:
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import ferroload
|
|
80
|
+
|
|
81
|
+
def mean_color(img): # img <- inputs=["image"]
|
|
82
|
+
return img.mean(axis=(0, 1)).astype("float32")
|
|
83
|
+
|
|
84
|
+
ds = ferroload.Dataset.open("/data/ds")
|
|
85
|
+
ds = ds.map(mean_color, inputs=["image"],
|
|
86
|
+
outputs={"emb": ferroload.Modality("npy")}, name="emb")
|
|
87
|
+
ds.read_array(0, "emb")
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Documentation
|
|
91
|
+
|
|
92
|
+
Full docs (Python API, quickstart, Rust core, benchmarks) are built with MkDocs
|
|
93
|
+
and published via GitHub Pages. See the project repository for links.
|
|
94
|
+
|
|
95
|
+
## License
|
|
96
|
+
|
|
97
|
+
Apache-2.0.
|
|
98
|
+
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# ferroload
|
|
2
|
+
|
|
3
|
+
**A pure-Rust multimodal dataset format + dataloader for PyTorch.**
|
|
4
|
+
|
|
5
|
+
Ferroload stores images, video, audio, tensors, and rich metadata in a
|
|
6
|
+
self-contained, shardable on-disk format and serves them to training loops with
|
|
7
|
+
parallel decode in Rust (the GIL released), SQL-style subsetting, and a one-call
|
|
8
|
+
loader that drops straight into PyTorch.
|
|
9
|
+
|
|
10
|
+
## Install
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install ferroload
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Optional extras:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install "ferroload[hf]" # HuggingFace import tooling (datasets, pillow, hub)
|
|
20
|
+
pip install "ferroload[torch]" # torch + numpy, for the DataLoader glue
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
In-Rust video decode is feature-gated (it needs system ffmpeg) and is **not** in
|
|
24
|
+
the published wheel — build from source for it:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
maturin develop --release --features video
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quickstart
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from ferroload import make_loader
|
|
34
|
+
|
|
35
|
+
dl = make_loader("/data/ds", batch_size=64,
|
|
36
|
+
columns=["image", "video", "label"], # kinds resolved from the manifest
|
|
37
|
+
resize=(224, 224), out="torch")
|
|
38
|
+
for epoch in range(epochs):
|
|
39
|
+
dl.set_epoch(epoch) # reshuffle (DDP-aware)
|
|
40
|
+
for batch in dl:
|
|
41
|
+
train_step(batch) # batch["image"], batch["video"], batch["label"]
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Enrich a dataset with an additive, resumable layer — functions bind to inputs
|
|
45
|
+
positionally and run once per sample by default, so they're generic:
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import ferroload
|
|
49
|
+
|
|
50
|
+
def mean_color(img): # img <- inputs=["image"]
|
|
51
|
+
return img.mean(axis=(0, 1)).astype("float32")
|
|
52
|
+
|
|
53
|
+
ds = ferroload.Dataset.open("/data/ds")
|
|
54
|
+
ds = ds.map(mean_color, inputs=["image"],
|
|
55
|
+
outputs={"emb": ferroload.Modality("npy")}, name="emb")
|
|
56
|
+
ds.read_array(0, "emb")
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Documentation
|
|
60
|
+
|
|
61
|
+
Full docs (Python API, quickstart, Rust core, benchmarks) are built with MkDocs
|
|
62
|
+
and published via GitHub Pages. See the project repository for links.
|
|
63
|
+
|
|
64
|
+
## License
|
|
65
|
+
|
|
66
|
+
Apache-2.0.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "ferroload-codec"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
license = "Apache-2.0"
|
|
6
|
+
description = "Modality decoders for Ferroload: image (pure-Rust), audio WAV (pure-Rust), video (ffmpeg/NVDEC, feature-gated)."
|
|
7
|
+
|
|
8
|
+
[features]
|
|
9
|
+
default = ["image-codec", "audio-codec"]
|
|
10
|
+
image-codec = ["dep:image"]
|
|
11
|
+
audio-codec = [] # pure-Rust WAV PCM, no extra deps
|
|
12
|
+
# Video decode links system ffmpeg (libav) / NVDEC and needs clang+pkg-config;
|
|
13
|
+
# not buildable in every environment, so it is strictly opt-in.
|
|
14
|
+
video-ffmpeg = ["dep:ffmpeg-next"]
|
|
15
|
+
video-nvdec = ["video-ffmpeg"]
|
|
16
|
+
|
|
17
|
+
[dependencies]
|
|
18
|
+
image = { version = "0.25", optional = true, default-features = false, features = ["png", "jpeg"] }
|
|
19
|
+
ffmpeg-next = { version = "7", optional = true }
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
//! Pure-Rust WAV/PCM decoder -> `[channels, samples]` F32 in [-1, 1].
|
|
2
|
+
//!
|
|
3
|
+
//! Covers the uncompressed case (and precomputed-feature pipelines). Compressed
|
|
4
|
+
//! formats (mp3/flac/aac) are intended to plug in via Symphonia behind an
|
|
5
|
+
//! `audio-codecs` feature; the [`Codec`] surface is identical.
|
|
6
|
+
|
|
7
|
+
use crate::{Codec, CodecError, Result, Tensor, TensorData};
|
|
8
|
+
|
|
9
|
+
fn rd_u16(b: &[u8], o: usize) -> u16 {
|
|
10
|
+
u16::from_le_bytes([b[o], b[o + 1]])
|
|
11
|
+
}
|
|
12
|
+
fn rd_u32(b: &[u8], o: usize) -> u32 {
|
|
13
|
+
u32::from_le_bytes([b[o], b[o + 1], b[o + 2], b[o + 3]])
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
pub struct WavCodec;
|
|
17
|
+
|
|
18
|
+
impl Codec for WavCodec {
|
|
19
|
+
fn decode(&self, b: &[u8]) -> Result<Tensor> {
|
|
20
|
+
if b.len() < 44 || &b[0..4] != b"RIFF" || &b[8..12] != b"WAVE" {
|
|
21
|
+
return Err(CodecError::Decode("not a RIFF/WAVE file".into()));
|
|
22
|
+
}
|
|
23
|
+
// walk chunks to find "fmt " and "data"
|
|
24
|
+
let mut pos = 12;
|
|
25
|
+
let (mut fmt, mut data): (Option<(u16, u16, u16)>, Option<(usize, usize)>) = (None, None);
|
|
26
|
+
while pos + 8 <= b.len() {
|
|
27
|
+
let id = &b[pos..pos + 4];
|
|
28
|
+
let sz = rd_u32(b, pos + 4) as usize;
|
|
29
|
+
let body = pos + 8;
|
|
30
|
+
if id == b"fmt " && body + 16 <= b.len() {
|
|
31
|
+
let audio_format = rd_u16(b, body);
|
|
32
|
+
let channels = rd_u16(b, body + 2);
|
|
33
|
+
let bits = rd_u16(b, body + 14);
|
|
34
|
+
fmt = Some((audio_format, channels, bits));
|
|
35
|
+
} else if id == b"data" {
|
|
36
|
+
let end = (body + sz).min(b.len());
|
|
37
|
+
data = Some((body, end));
|
|
38
|
+
}
|
|
39
|
+
pos = body + sz + (sz & 1); // chunks are word-aligned
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
let (audio_format, channels, bits) =
|
|
43
|
+
fmt.ok_or_else(|| CodecError::Decode("missing fmt chunk".into()))?;
|
|
44
|
+
let (ds, de) = data.ok_or_else(|| CodecError::Decode("missing data chunk".into()))?;
|
|
45
|
+
let channels = channels.max(1) as usize;
|
|
46
|
+
let raw = &b[ds..de];
|
|
47
|
+
|
|
48
|
+
// decode interleaved samples to f32
|
|
49
|
+
let interleaved: Vec<f32> = match (audio_format, bits) {
|
|
50
|
+
(1, 16) => raw
|
|
51
|
+
.chunks_exact(2)
|
|
52
|
+
.map(|c| i16::from_le_bytes([c[0], c[1]]) as f32 / 32768.0)
|
|
53
|
+
.collect(),
|
|
54
|
+
(1, 8) => raw.iter().map(|&x| (x as f32 - 128.0) / 128.0).collect(),
|
|
55
|
+
(3, 32) => raw
|
|
56
|
+
.chunks_exact(4)
|
|
57
|
+
.map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
|
|
58
|
+
.collect(),
|
|
59
|
+
(af, bps) => {
|
|
60
|
+
return Err(CodecError::Unsupported(format!(
|
|
61
|
+
"WAV format={af} bits={bps} (use Symphonia for compressed audio)"
|
|
62
|
+
)))
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
let frames = interleaved.len() / channels;
|
|
67
|
+
// deinterleave -> channel-major [channels, frames]
|
|
68
|
+
let mut out = vec![0f32; channels * frames];
|
|
69
|
+
for f in 0..frames {
|
|
70
|
+
for c in 0..channels {
|
|
71
|
+
out[c * frames + f] = interleaved[f * channels + c];
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
Ok(Tensor {
|
|
75
|
+
shape: vec![channels, frames],
|
|
76
|
+
data: TensorData::F32(out),
|
|
77
|
+
})
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
#[cfg(test)]
|
|
82
|
+
mod tests {
|
|
83
|
+
use super::*;
|
|
84
|
+
|
|
85
|
+
/// Build a 16-bit PCM WAV from channel-major f32 samples.
|
|
86
|
+
fn make_wav(channels: u16, samples_per_ch: &[Vec<i16>]) -> Vec<u8> {
|
|
87
|
+
let frames = samples_per_ch[0].len();
|
|
88
|
+
let mut data = Vec::new();
|
|
89
|
+
for f in 0..frames {
|
|
90
|
+
for ch in 0..channels as usize {
|
|
91
|
+
data.extend_from_slice(&samples_per_ch[ch][f].to_le_bytes());
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
let mut w = Vec::new();
|
|
95
|
+
w.extend_from_slice(b"RIFF");
|
|
96
|
+
w.extend_from_slice(&(36 + data.len() as u32).to_le_bytes());
|
|
97
|
+
w.extend_from_slice(b"WAVE");
|
|
98
|
+
w.extend_from_slice(b"fmt ");
|
|
99
|
+
w.extend_from_slice(&16u32.to_le_bytes());
|
|
100
|
+
w.extend_from_slice(&1u16.to_le_bytes()); // PCM
|
|
101
|
+
w.extend_from_slice(&channels.to_le_bytes());
|
|
102
|
+
w.extend_from_slice(&16000u32.to_le_bytes()); // sample rate
|
|
103
|
+
w.extend_from_slice(&(16000 * channels as u32 * 2).to_le_bytes());
|
|
104
|
+
w.extend_from_slice(&(channels * 2).to_le_bytes());
|
|
105
|
+
w.extend_from_slice(&16u16.to_le_bytes()); // bits
|
|
106
|
+
w.extend_from_slice(b"data");
|
|
107
|
+
w.extend_from_slice(&(data.len() as u32).to_le_bytes());
|
|
108
|
+
w.extend_from_slice(&data);
|
|
109
|
+
w
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
#[test]
|
|
113
|
+
fn decode_stereo_pcm16() {
|
|
114
|
+
// ch0 = [0, 16384], ch1 = [-32768, 32767]
|
|
115
|
+
let wav = make_wav(2, &[vec![0, 16384], vec![-32768, 32767]]);
|
|
116
|
+
let t = WavCodec.decode(&wav).unwrap();
|
|
117
|
+
assert_eq!(t.shape, vec![2, 2]); // [channels, frames]
|
|
118
|
+
assert!(t.check());
|
|
119
|
+
if let TensorData::F32(d) = &t.data {
|
|
120
|
+
// channel-major: [ch0_f0, ch0_f1, ch1_f0, ch1_f1]
|
|
121
|
+
assert!((d[0] - 0.0).abs() < 1e-6);
|
|
122
|
+
assert!((d[1] - 0.5).abs() < 1e-6);
|
|
123
|
+
assert!((d[2] - (-1.0)).abs() < 1e-6);
|
|
124
|
+
assert!((d[3] - 0.9999).abs() < 1e-3);
|
|
125
|
+
} else {
|
|
126
|
+
panic!("expected F32");
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
#[test]
|
|
131
|
+
fn rejects_non_wav() {
|
|
132
|
+
assert!(WavCodec.decode(b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx").is_err());
|
|
133
|
+
}
|
|
134
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
//! Pure-Rust image decoder (PNG/JPEG) producing an `[H, W, 3]` U8 tensor.
|
|
2
|
+
|
|
3
|
+
use crate::{Codec, CodecError, Result, Tensor, TensorData};
|
|
4
|
+
|
|
5
|
+
pub struct ImageCodec;
|
|
6
|
+
|
|
7
|
+
impl ImageCodec {
|
|
8
|
+
/// Decode and resize to exactly `(h, w)` (RGB). Used for collation into a
|
|
9
|
+
/// uniform `[B,H,W,3]` batch (resize-on-decode avoids full-res then shrink).
|
|
10
|
+
pub fn decode_resized(&self, bytes: &[u8], h: usize, w: usize) -> Result<Tensor> {
|
|
11
|
+
let img = image::load_from_memory(bytes)
|
|
12
|
+
.map_err(|e| CodecError::Decode(format!("image: {e}")))?;
|
|
13
|
+
let img = img.resize_exact(w as u32, h as u32, image::imageops::FilterType::Triangle);
|
|
14
|
+
let data = img.to_rgb8().into_raw();
|
|
15
|
+
Ok(Tensor {
|
|
16
|
+
shape: vec![h, w, 3],
|
|
17
|
+
data: TensorData::U8(data),
|
|
18
|
+
})
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
impl Codec for ImageCodec {
|
|
23
|
+
fn decode(&self, bytes: &[u8]) -> Result<Tensor> {
|
|
24
|
+
let img = image::load_from_memory(bytes)
|
|
25
|
+
.map_err(|e| CodecError::Decode(format!("image: {e}")))?
|
|
26
|
+
.to_rgb8();
|
|
27
|
+
let (w, h) = img.dimensions();
|
|
28
|
+
let data = img.into_raw(); // row-major RGB, len = h*w*3
|
|
29
|
+
Ok(Tensor {
|
|
30
|
+
shape: vec![h as usize, w as usize, 3],
|
|
31
|
+
data: TensorData::U8(data),
|
|
32
|
+
})
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
#[cfg(test)]
|
|
37
|
+
mod tests {
|
|
38
|
+
use super::*;
|
|
39
|
+
|
|
40
|
+
// Encode a tiny RGB PNG in-memory, then decode it back.
|
|
41
|
+
fn make_png(w: u32, h: u32) -> Vec<u8> {
|
|
42
|
+
let mut buf = image::RgbImage::new(w, h);
|
|
43
|
+
for (x, y, px) in buf.enumerate_pixels_mut() {
|
|
44
|
+
*px = image::Rgb([x as u8, y as u8, 7]);
|
|
45
|
+
}
|
|
46
|
+
let mut out = std::io::Cursor::new(Vec::new());
|
|
47
|
+
image::DynamicImage::ImageRgb8(buf)
|
|
48
|
+
.write_to(&mut out, image::ImageFormat::Png)
|
|
49
|
+
.unwrap();
|
|
50
|
+
out.into_inner()
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
#[test]
|
|
54
|
+
fn decode_png_shape_and_pixels() {
|
|
55
|
+
let png = make_png(4, 3);
|
|
56
|
+
let t = ImageCodec.decode(&png).unwrap();
|
|
57
|
+
assert_eq!(t.shape, vec![3, 4, 3]); // [H, W, C]
|
|
58
|
+
assert!(t.check());
|
|
59
|
+
if let TensorData::U8(d) = &t.data {
|
|
60
|
+
// pixel (x=2,y=1) -> R=2,G=1,B=7 ; offset = (y*W + x)*3
|
|
61
|
+
let off = (1 * 4 + 2) * 3;
|
|
62
|
+
assert_eq!(&d[off..off + 3], &[2, 1, 7]);
|
|
63
|
+
} else {
|
|
64
|
+
panic!("expected U8");
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
#[test]
|
|
69
|
+
fn bad_bytes_error() {
|
|
70
|
+
assert!(ImageCodec.decode(b"not an image").is_err());
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
#[test]
|
|
74
|
+
fn decode_resized_to_fixed_shape() {
|
|
75
|
+
let png = make_png(10, 7);
|
|
76
|
+
let t = ImageCodec.decode_resized(&png, 4, 4).unwrap();
|
|
77
|
+
assert_eq!(t.shape, vec![4, 4, 3]); // [H, W, C] regardless of source size
|
|
78
|
+
assert!(t.check());
|
|
79
|
+
}
|
|
80
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
//! # ferroload-codec
|
|
2
|
+
//!
|
|
3
|
+
//! Per-modality decoders behind a single [`Codec`] trait. A decoder turns raw
|
|
4
|
+
//! member bytes (as stored in a shard) into a [`Tensor`]. Backends are
|
|
5
|
+
//! feature-gated:
|
|
6
|
+
//!
|
|
7
|
+
//! - `image-codec` (default) — pure-Rust PNG/JPEG via the `image` crate.
|
|
8
|
+
//! - `audio-codec` (default) — pure-Rust WAV/PCM decoder (no deps).
|
|
9
|
+
//! - `video-ffmpeg` / `video-nvdec` (opt-in) — libav/NVDEC; require system
|
|
10
|
+
//! ffmpeg + clang, so they are not built in constrained environments.
|
|
11
|
+
//!
|
|
12
|
+
//! Unknown modalities with no registered codec can always fall back to raw bytes.
|
|
13
|
+
|
|
14
|
+
use std::collections::BTreeMap;
|
|
15
|
+
|
|
16
|
+
mod tensor;
|
|
17
|
+
pub use tensor::{Dtype, Tensor, TensorData};
|
|
18
|
+
|
|
19
|
+
#[cfg(feature = "image-codec")]
|
|
20
|
+
pub mod image_codec;
|
|
21
|
+
#[cfg(feature = "audio-codec")]
|
|
22
|
+
pub mod audio_wav;
|
|
23
|
+
pub mod sampling; // temporal frame-index selection (pure, always available)
|
|
24
|
+
#[cfg(feature = "video-ffmpeg")]
|
|
25
|
+
pub mod video;
|
|
26
|
+
|
|
27
|
+
#[derive(Debug)]
|
|
28
|
+
pub enum CodecError {
|
|
29
|
+
Decode(String),
|
|
30
|
+
Unsupported(String),
|
|
31
|
+
}
|
|
32
|
+
impl std::fmt::Display for CodecError {
|
|
33
|
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
34
|
+
match self {
|
|
35
|
+
CodecError::Decode(s) => write!(f, "decode error: {s}"),
|
|
36
|
+
CodecError::Unsupported(s) => write!(f, "unsupported: {s}"),
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
impl std::error::Error for CodecError {}
|
|
41
|
+
pub type Result<T> = std::result::Result<T, CodecError>;
|
|
42
|
+
|
|
43
|
+
/// Decode raw member bytes into a tensor.
|
|
44
|
+
pub trait Codec: Send + Sync {
|
|
45
|
+
fn decode(&self, bytes: &[u8]) -> Result<Tensor>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/// Passthrough "codec" that returns the raw bytes as a 1-D U8 tensor — the
|
|
49
|
+
/// fallback for modalities without a registered decoder.
|
|
50
|
+
pub struct RawCodec;
|
|
51
|
+
impl Codec for RawCodec {
|
|
52
|
+
fn decode(&self, bytes: &[u8]) -> Result<Tensor> {
|
|
53
|
+
Ok(Tensor {
|
|
54
|
+
shape: vec![bytes.len()],
|
|
55
|
+
data: TensorData::U8(bytes.to_vec()),
|
|
56
|
+
})
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/// A registry mapping codec name -> implementation. Custom codecs (e.g. depth,
|
|
61
|
+
/// hyperspectral) can be inserted at runtime.
|
|
62
|
+
#[derive(Default)]
|
|
63
|
+
pub struct Registry {
|
|
64
|
+
codecs: BTreeMap<String, Box<dyn Codec>>,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
impl Registry {
|
|
68
|
+
pub fn new() -> Self {
|
|
69
|
+
Registry { codecs: BTreeMap::new() }
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
pub fn register(&mut self, name: &str, codec: Box<dyn Codec>) {
|
|
73
|
+
self.codecs.insert(name.to_string(), codec);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
pub fn get(&self, name: &str) -> Option<&dyn Codec> {
|
|
77
|
+
self.codecs.get(name).map(|b| b.as_ref())
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/// Decode with the named codec, falling back to raw bytes if unregistered.
|
|
81
|
+
pub fn decode_or_raw(&self, name: &str, bytes: &[u8]) -> Result<Tensor> {
|
|
82
|
+
match self.get(name) {
|
|
83
|
+
Some(c) => c.decode(bytes),
|
|
84
|
+
None => RawCodec.decode(bytes),
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/// Registry preloaded with the compiled-in default codecs.
|
|
89
|
+
pub fn with_defaults() -> Self {
|
|
90
|
+
let mut r = Registry::new();
|
|
91
|
+
#[cfg(feature = "image-codec")]
|
|
92
|
+
r.register("image", Box::new(image_codec::ImageCodec));
|
|
93
|
+
#[cfg(feature = "audio-codec")]
|
|
94
|
+
r.register("audio", Box::new(audio_wav::WavCodec));
|
|
95
|
+
r.register("raw", Box::new(RawCodec));
|
|
96
|
+
r
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
#[cfg(test)]
|
|
101
|
+
mod tests {
|
|
102
|
+
use super::*;
|
|
103
|
+
|
|
104
|
+
#[test]
|
|
105
|
+
fn raw_fallback_for_unknown_modality() {
|
|
106
|
+
let r = Registry::with_defaults();
|
|
107
|
+
let t = r.decode_or_raw("hyperspectral", b"\x01\x02\x03").unwrap();
|
|
108
|
+
assert_eq!(t.shape, vec![3]);
|
|
109
|
+
assert!(matches!(t.data, TensorData::U8(_)));
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
//! Temporal frame-index selection for video (DESIGN §14.5: subsample *which*
|
|
2
|
+
//! frames to keep so we never decode-all-then-drop). Pure logic, fully tested;
|
|
3
|
+
//! used by the feature-gated ffmpeg decoder to choose frames before decoding.
|
|
4
|
+
|
|
5
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
6
|
+
pub enum Sampling {
|
|
7
|
+
/// Evenly spaced across the clip.
|
|
8
|
+
Uniform,
|
|
9
|
+
/// Contiguous run starting at the front.
|
|
10
|
+
Dense,
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/// Choose `num_frames` indices out of `total` according to `sampling`.
|
|
14
|
+
/// Always returns indices in `[0, total)`, ascending, length `min(num_frames, total)`.
|
|
15
|
+
pub fn frame_indices(total: usize, num_frames: usize, sampling: Sampling) -> Vec<usize> {
|
|
16
|
+
if total == 0 || num_frames == 0 {
|
|
17
|
+
return Vec::new();
|
|
18
|
+
}
|
|
19
|
+
let k = num_frames.min(total);
|
|
20
|
+
match sampling {
|
|
21
|
+
Sampling::Dense => (0..k).collect(),
|
|
22
|
+
Sampling::Uniform => {
|
|
23
|
+
if k == 1 {
|
|
24
|
+
return vec![total / 2];
|
|
25
|
+
}
|
|
26
|
+
// evenly spaced incl. endpoints: round(i*(total-1)/(k-1))
|
|
27
|
+
(0..k)
|
|
28
|
+
.map(|i| ((i * (total - 1)) as f64 / (k - 1) as f64).round() as usize)
|
|
29
|
+
.collect()
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
#[cfg(test)]
|
|
35
|
+
mod tests {
|
|
36
|
+
use super::*;
|
|
37
|
+
|
|
38
|
+
#[test]
|
|
39
|
+
fn uniform_spans_endpoints() {
|
|
40
|
+
assert_eq!(frame_indices(10, 4, Sampling::Uniform), vec![0, 3, 6, 9]);
|
|
41
|
+
assert_eq!(frame_indices(100, 1, Sampling::Uniform), vec![50]);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
#[test]
|
|
45
|
+
fn dense_is_prefix() {
|
|
46
|
+
assert_eq!(frame_indices(10, 4, Sampling::Dense), vec![0, 1, 2, 3]);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
#[test]
|
|
50
|
+
fn clamps_when_fewer_frames_than_requested() {
|
|
51
|
+
assert_eq!(frame_indices(3, 8, Sampling::Uniform), vec![0, 1, 2]);
|
|
52
|
+
assert_eq!(frame_indices(0, 8, Sampling::Uniform), Vec::<usize>::new());
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
#[test]
|
|
56
|
+
fn indices_in_range_and_sorted() {
|
|
57
|
+
for total in [1usize, 5, 16, 257] {
|
|
58
|
+
for nf in [1usize, 4, 16] {
|
|
59
|
+
let idx = frame_indices(total, nf, Sampling::Uniform);
|
|
60
|
+
assert!(idx.iter().all(|&i| i < total));
|
|
61
|
+
assert!(idx.windows(2).all(|w| w[0] <= w[1]));
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
//! Minimal owned tensor returned by decoders. The PyO3 layer maps these to
|
|
2
|
+
//! torch tensors (zero-copy via DLPack) at the boundary.
|
|
3
|
+
|
|
4
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
5
|
+
pub enum Dtype {
|
|
6
|
+
U8,
|
|
7
|
+
F32,
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
#[derive(Debug, Clone, PartialEq)]
|
|
11
|
+
pub enum TensorData {
|
|
12
|
+
U8(Vec<u8>),
|
|
13
|
+
F32(Vec<f32>),
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
#[derive(Debug, Clone, PartialEq)]
|
|
17
|
+
pub struct Tensor {
|
|
18
|
+
pub shape: Vec<usize>,
|
|
19
|
+
pub data: TensorData,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
impl Tensor {
|
|
23
|
+
pub fn dtype(&self) -> Dtype {
|
|
24
|
+
match self.data {
|
|
25
|
+
TensorData::U8(_) => Dtype::U8,
|
|
26
|
+
TensorData::F32(_) => Dtype::F32,
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
pub fn numel(&self) -> usize {
|
|
31
|
+
self.shape.iter().product()
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/// Validate that the element count matches the declared shape.
|
|
35
|
+
pub fn check(&self) -> bool {
|
|
36
|
+
let n = self.numel();
|
|
37
|
+
match &self.data {
|
|
38
|
+
TensorData::U8(v) => v.len() == n,
|
|
39
|
+
TensorData::F32(v) => v.len() == n,
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|