humecodec 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- humecodec-0.1.0/PKG-INFO +315 -0
- humecodec-0.1.0/README.md +284 -0
- humecodec-0.1.0/pyproject.toml +80 -0
- humecodec-0.1.0/setup.cfg +4 -0
- humecodec-0.1.0/setup.py +277 -0
- humecodec-0.1.0/src/humecodec/__init__.py +230 -0
- humecodec-0.1.0/src/humecodec/_extension.py +15 -0
- humecodec-0.1.0/src/humecodec/csrc/cuda_utils.cpp +90 -0
- humecodec-0.1.0/src/humecodec/csrc/ffmpeg.cpp +152 -0
- humecodec-0.1.0/src/humecodec/csrc/filter_graph.cpp +255 -0
- humecodec-0.1.0/src/humecodec/csrc/hw_context.cpp +42 -0
- humecodec-0.1.0/src/humecodec/csrc/managed_buffer.cpp +267 -0
- humecodec-0.1.0/src/humecodec/csrc/pybind.cpp +567 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_reader/buffer/chunked_buffer.cpp +170 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_reader/buffer/unchunked_buffer.cpp +67 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_reader/conversion.cpp +600 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_reader/packet_buffer.cpp +24 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_reader/post_process.cpp +648 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_reader/stream_processor.cpp +339 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_reader/stream_reader.cpp +656 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_writer/encode_process.cpp +1002 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_writer/encoder.cpp +46 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_writer/packet_writer.cpp +36 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_writer/stream_writer.cpp +405 -0
- humecodec-0.1.0/src/humecodec/csrc/stream_writer/tensor_converter.cpp +394 -0
- humecodec-0.1.0/src/humecodec/decoder.py +545 -0
- humecodec-0.1.0/src/humecodec/encoder.py +255 -0
- humecodec-0.1.0/src/humecodec.egg-info/PKG-INFO +315 -0
- humecodec-0.1.0/src/humecodec.egg-info/SOURCES.txt +30 -0
- humecodec-0.1.0/src/humecodec.egg-info/dependency_links.txt +1 -0
- humecodec-0.1.0/src/humecodec.egg-info/requires.txt +1 -0
- humecodec-0.1.0/src/humecodec.egg-info/top_level.txt +1 -0
humecodec-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: humecodec
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: FFmpeg integration for PyTorch with bundled libraries
|
|
5
|
+
Author: humecodec contributors
|
|
6
|
+
License: BSD-3-Clause
|
|
7
|
+
Project-URL: Homepage, https://github.com/your-org/humecodec
|
|
8
|
+
Project-URL: Documentation, https://github.com/your-org/humecodec#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/your-org/humecodec
|
|
10
|
+
Project-URL: Issues, https://github.com/your-org/humecodec/issues
|
|
11
|
+
Keywords: audio,video,ffmpeg,pytorch,machine learning
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
16
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
17
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
18
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
26
|
+
Classifier: Topic :: Multimedia :: Video
|
|
27
|
+
Classifier: Topic :: Scientific/Engineering
|
|
28
|
+
Requires-Python: >=3.9
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
Requires-Dist: torch>=2.0
|
|
31
|
+
|
|
32
|
+
# humecodec
|
|
33
|
+
|
|
34
|
+
FFmpeg integration for PyTorch with bundled libraries. Load and save audio/video files directly to PyTorch tensors without requiring a system FFmpeg installation.
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install humecodec
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
The package includes bundled FFmpeg libraries, so no separate FFmpeg installation is required.
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
### Load Audio
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
import humecodec
|
|
50
|
+
|
|
51
|
+
# Load an audio file (returns tensor and sample rate)
|
|
52
|
+
waveform, sample_rate = humecodec.load_audio("audio.mp3")
|
|
53
|
+
print(f"Shape: {waveform.shape}") # (num_frames, num_channels)
|
|
54
|
+
print(f"Sample rate: {sample_rate}")
|
|
55
|
+
|
|
56
|
+
# Load with resampling
|
|
57
|
+
waveform, sr = humecodec.load_audio("audio.mp3", sample_rate=16000)
|
|
58
|
+
|
|
59
|
+
# Load a specific duration starting from an offset
|
|
60
|
+
waveform, sr = humecodec.load_audio("audio.mp3", offset=1.0, duration=5.0)
|
|
61
|
+
|
|
62
|
+
# Load as mono
|
|
63
|
+
waveform, sr = humecodec.load_audio("audio.mp3", num_channels=1)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Save Audio
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import torch
|
|
70
|
+
import humecodec
|
|
71
|
+
|
|
72
|
+
# Create a simple sine wave
|
|
73
|
+
sample_rate = 44100
|
|
74
|
+
duration = 2.0
|
|
75
|
+
t = torch.linspace(0, duration, int(sample_rate * duration))
|
|
76
|
+
waveform = 0.5 * torch.sin(2 * torch.pi * 440 * t).unsqueeze(1) # 440 Hz tone
|
|
77
|
+
|
|
78
|
+
# Save as WAV
|
|
79
|
+
humecodec.save_audio("output.wav", waveform, sample_rate)
|
|
80
|
+
|
|
81
|
+
# Save as MP3
|
|
82
|
+
humecodec.save_audio("output.mp3", waveform, sample_rate)
|
|
83
|
+
|
|
84
|
+
# Save as FLAC with custom encoder options
|
|
85
|
+
humecodec.save_audio(
|
|
86
|
+
"output.flac",
|
|
87
|
+
waveform,
|
|
88
|
+
sample_rate,
|
|
89
|
+
encoder_option={"compression_level": "8"}
|
|
90
|
+
)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Get Audio Info
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
import humecodec
|
|
97
|
+
|
|
98
|
+
info = humecodec.info("audio.mp3")
|
|
99
|
+
print(f"Sample rate: {info.sample_rate}")
|
|
100
|
+
print(f"Channels: {info.num_channels}")
|
|
101
|
+
print(f"Duration: {info.num_frames / info.sample_rate:.2f}s")
|
|
102
|
+
print(f"Codec: {info.codec}")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Advanced Usage
|
|
106
|
+
|
|
107
|
+
### Streaming Decode
|
|
108
|
+
|
|
109
|
+
For large files or real-time processing, use the streaming API:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from humecodec import MediaDecoder
|
|
113
|
+
|
|
114
|
+
decoder = MediaDecoder("long_audio.wav")
|
|
115
|
+
decoder.add_audio_stream(
|
|
116
|
+
frames_per_chunk=4096, # Process 4096 frames at a time
|
|
117
|
+
buffer_chunk_size=3,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
for (chunk,) in decoder.stream():
|
|
121
|
+
if chunk is not None:
|
|
122
|
+
# Process chunk: shape (frames_per_chunk, num_channels)
|
|
123
|
+
process(chunk)
|
|
124
|
+
print(f"PTS: {chunk.pts:.2f}s")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Streaming Encode
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from humecodec import MediaEncoder
|
|
131
|
+
import torch
|
|
132
|
+
|
|
133
|
+
encoder = MediaEncoder("output.wav")
|
|
134
|
+
encoder.add_audio_stream(
|
|
135
|
+
sample_rate=44100,
|
|
136
|
+
num_channels=2,
|
|
137
|
+
format="flt", # 32-bit float input
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
with encoder.open():
|
|
141
|
+
# Write audio in chunks
|
|
142
|
+
for chunk in generate_audio_chunks():
|
|
143
|
+
encoder.write_audio_chunk(0, chunk)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Video Support
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from humecodec import MediaDecoder, MediaEncoder
|
|
150
|
+
|
|
151
|
+
# Decode video
|
|
152
|
+
decoder = MediaDecoder("video.mp4")
|
|
153
|
+
decoder.add_video_stream(
|
|
154
|
+
frames_per_chunk=1,
|
|
155
|
+
format="rgb24", # Output as RGB
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
for (frame,) in decoder.stream():
|
|
159
|
+
if frame is not None:
|
|
160
|
+
# frame shape: (1, 3, height, width)
|
|
161
|
+
print(f"Frame at {frame.pts:.2f}s")
|
|
162
|
+
|
|
163
|
+
# Encode video
|
|
164
|
+
encoder = MediaEncoder("output.mp4")
|
|
165
|
+
encoder.add_video_stream(
|
|
166
|
+
frame_rate=30.0,
|
|
167
|
+
width=1920,
|
|
168
|
+
height=1080,
|
|
169
|
+
format="rgb24",
|
|
170
|
+
encoder="libx264",
|
|
171
|
+
encoder_option={"crf": "23", "preset": "medium"},
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
with encoder.open():
|
|
175
|
+
for frame in frames:
|
|
176
|
+
# frame shape: (1, 3, height, width), dtype uint8
|
|
177
|
+
encoder.write_video_chunk(0, frame)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Custom Filter Graphs
|
|
181
|
+
|
|
182
|
+
Apply FFmpeg filters during decode:
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
from humecodec import MediaDecoder
|
|
186
|
+
|
|
187
|
+
decoder = MediaDecoder("audio.wav")
|
|
188
|
+
|
|
189
|
+
# Add audio stream with filter (resample + convert to mono)
|
|
190
|
+
decoder.add_audio_stream(
|
|
191
|
+
frames_per_chunk=-1, # Read all at once
|
|
192
|
+
buffer_chunk_size=-1,
|
|
193
|
+
filter_desc="aresample=16000,aformat=sample_fmts=fltp:channel_layouts=mono",
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
decoder.process_all_packets()
|
|
197
|
+
chunks = decoder.pop_chunks()
|
|
198
|
+
waveform = chunks[0] # Resampled mono audio
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Seeking
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
from humecodec import MediaDecoder
|
|
205
|
+
|
|
206
|
+
decoder = MediaDecoder("audio.mp3")
|
|
207
|
+
decoder.add_audio_stream(frames_per_chunk=44100)
|
|
208
|
+
|
|
209
|
+
# Seek to 30 seconds
|
|
210
|
+
decoder.seek(30.0, mode="precise") # or "key" for keyframe-only
|
|
211
|
+
|
|
212
|
+
for (chunk,) in decoder.stream():
|
|
213
|
+
# Chunks start from ~30s
|
|
214
|
+
print(f"PTS: {chunk.pts}")
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## API Reference
|
|
218
|
+
|
|
219
|
+
### Convenience Functions
|
|
220
|
+
|
|
221
|
+
| Function | Description |
|
|
222
|
+
|----------|-------------|
|
|
223
|
+
| `load_audio(path, ...)` | Load audio file to tensor |
|
|
224
|
+
| `save_audio(path, waveform, sample_rate, ...)` | Save tensor to audio file |
|
|
225
|
+
| `info(path)` | Get audio file metadata |
|
|
226
|
+
|
|
227
|
+
### Classes
|
|
228
|
+
|
|
229
|
+
| Class | Description |
|
|
230
|
+
|-------|-------------|
|
|
231
|
+
| `MediaDecoder` | Streaming decoder for audio/video |
|
|
232
|
+
| `MediaEncoder` | Streaming encoder for audio/video |
|
|
233
|
+
| `CodecConfig` | Codec configuration (bit_rate, gop_size, etc.) |
|
|
234
|
+
|
|
235
|
+
### Query Functions
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
import humecodec
|
|
239
|
+
|
|
240
|
+
# List available codecs
|
|
241
|
+
humecodec.get_audio_decoders() # {'mp3': 'MP3 ...', 'aac': 'AAC ...', ...}
|
|
242
|
+
humecodec.get_audio_encoders()
|
|
243
|
+
humecodec.get_video_decoders()
|
|
244
|
+
humecodec.get_video_encoders()
|
|
245
|
+
|
|
246
|
+
# List available formats
|
|
247
|
+
humecodec.get_demuxers() # Input formats
|
|
248
|
+
humecodec.get_muxers() # Output formats
|
|
249
|
+
|
|
250
|
+
# Get FFmpeg library versions
|
|
251
|
+
humecodec.get_versions()
|
|
252
|
+
# {'libavcodec': (62, 11, 100), 'libavformat': (62, 3, 100), ...}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## Tensor Formats
|
|
256
|
+
|
|
257
|
+
### Audio
|
|
258
|
+
|
|
259
|
+
- **Shape**: `(num_frames, num_channels)`
|
|
260
|
+
- **dtype**: `torch.float32` (default), range `[-1.0, 1.0]`
|
|
261
|
+
- Stereo: `(N, 2)`, Mono: `(N, 1)`
|
|
262
|
+
|
|
263
|
+
### Video
|
|
264
|
+
|
|
265
|
+
- **Shape**: `(num_frames, channels, height, width)`
|
|
266
|
+
- **dtype**: `torch.uint8` for RGB/BGR, `torch.float32` for YUV
|
|
267
|
+
- RGB24: `(N, 3, H, W)`, values `[0, 255]`
|
|
268
|
+
|
|
269
|
+
## Supported Formats
|
|
270
|
+
|
|
271
|
+
The bundled FFmpeg includes support for common formats:
|
|
272
|
+
|
|
273
|
+
**Audio**: WAV, MP3, AAC, FLAC, OGG/Vorbis, Opus
|
|
274
|
+
**Video**: H.264, H.265/HEVC, VP8, VP9, AV1
|
|
275
|
+
**Containers**: MP4, MKV, WebM, AVI, MOV
|
|
276
|
+
|
|
277
|
+
## Building from Source
|
|
278
|
+
|
|
279
|
+
For development or custom FFmpeg builds:
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
git clone https://github.com/your-org/humecodec
|
|
283
|
+
cd humecodec
|
|
284
|
+
|
|
285
|
+
# Install with system FFmpeg
|
|
286
|
+
pip install -e .
|
|
287
|
+
|
|
288
|
+
# Or with custom FFmpeg location
|
|
289
|
+
HUMECODEC_FFMPEG_ROOT=/path/to/ffmpeg pip install -e .
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
### Building Wheels Locally
|
|
293
|
+
|
|
294
|
+
To build manylinux wheels with bundled FFmpeg libraries using Docker:
|
|
295
|
+
|
|
296
|
+
```bash
|
|
297
|
+
# Install cibuildwheel
|
|
298
|
+
pip install cibuildwheel
|
|
299
|
+
|
|
300
|
+
# Build wheel for current Python version (e.g., cp310)
|
|
301
|
+
sudo CIBW_MANYLINUX_X86_64_IMAGE=quay.io/pypa/manylinux_2_28_x86_64 \
|
|
302
|
+
cibuildwheel --only cp310-manylinux_x86_64 --output-dir wheelhouse
|
|
303
|
+
|
|
304
|
+
# Build all Python versions
|
|
305
|
+
sudo CIBW_MANYLINUX_X86_64_IMAGE=quay.io/pypa/manylinux_2_28_x86_64 \
|
|
306
|
+
cibuildwheel --output-dir wheelhouse
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
The resulting wheel (~38 MB) includes all FFmpeg libraries and works without any system FFmpeg installation.
|
|
310
|
+
|
|
311
|
+
## License
|
|
312
|
+
|
|
313
|
+
BSD-3-Clause
|
|
314
|
+
|
|
315
|
+
This project bundles FFmpeg libraries which are licensed under LGPL/GPL. See FFmpeg's license for details.
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
# humecodec
|
|
2
|
+
|
|
3
|
+
FFmpeg integration for PyTorch with bundled libraries. Load and save audio/video files directly to PyTorch tensors without requiring a system FFmpeg installation.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install humecodec
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
The package includes bundled FFmpeg libraries, so no separate FFmpeg installation is required.
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
### Load Audio
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
import humecodec
|
|
19
|
+
|
|
20
|
+
# Load an audio file (returns tensor and sample rate)
|
|
21
|
+
waveform, sample_rate = humecodec.load_audio("audio.mp3")
|
|
22
|
+
print(f"Shape: {waveform.shape}") # (num_frames, num_channels)
|
|
23
|
+
print(f"Sample rate: {sample_rate}")
|
|
24
|
+
|
|
25
|
+
# Load with resampling
|
|
26
|
+
waveform, sr = humecodec.load_audio("audio.mp3", sample_rate=16000)
|
|
27
|
+
|
|
28
|
+
# Load a specific duration starting from an offset
|
|
29
|
+
waveform, sr = humecodec.load_audio("audio.mp3", offset=1.0, duration=5.0)
|
|
30
|
+
|
|
31
|
+
# Load as mono
|
|
32
|
+
waveform, sr = humecodec.load_audio("audio.mp3", num_channels=1)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Save Audio
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import torch
|
|
39
|
+
import humecodec
|
|
40
|
+
|
|
41
|
+
# Create a simple sine wave
|
|
42
|
+
sample_rate = 44100
|
|
43
|
+
duration = 2.0
|
|
44
|
+
t = torch.linspace(0, duration, int(sample_rate * duration))
|
|
45
|
+
waveform = 0.5 * torch.sin(2 * torch.pi * 440 * t).unsqueeze(1) # 440 Hz tone
|
|
46
|
+
|
|
47
|
+
# Save as WAV
|
|
48
|
+
humecodec.save_audio("output.wav", waveform, sample_rate)
|
|
49
|
+
|
|
50
|
+
# Save as MP3
|
|
51
|
+
humecodec.save_audio("output.mp3", waveform, sample_rate)
|
|
52
|
+
|
|
53
|
+
# Save as FLAC with custom encoder options
|
|
54
|
+
humecodec.save_audio(
|
|
55
|
+
"output.flac",
|
|
56
|
+
waveform,
|
|
57
|
+
sample_rate,
|
|
58
|
+
encoder_option={"compression_level": "8"}
|
|
59
|
+
)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Get Audio Info
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
import humecodec
|
|
66
|
+
|
|
67
|
+
info = humecodec.info("audio.mp3")
|
|
68
|
+
print(f"Sample rate: {info.sample_rate}")
|
|
69
|
+
print(f"Channels: {info.num_channels}")
|
|
70
|
+
print(f"Duration: {info.num_frames / info.sample_rate:.2f}s")
|
|
71
|
+
print(f"Codec: {info.codec}")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Advanced Usage
|
|
75
|
+
|
|
76
|
+
### Streaming Decode
|
|
77
|
+
|
|
78
|
+
For large files or real-time processing, use the streaming API:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from humecodec import MediaDecoder
|
|
82
|
+
|
|
83
|
+
decoder = MediaDecoder("long_audio.wav")
|
|
84
|
+
decoder.add_audio_stream(
|
|
85
|
+
frames_per_chunk=4096, # Process 4096 frames at a time
|
|
86
|
+
buffer_chunk_size=3,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
for (chunk,) in decoder.stream():
|
|
90
|
+
if chunk is not None:
|
|
91
|
+
# Process chunk: shape (frames_per_chunk, num_channels)
|
|
92
|
+
process(chunk)
|
|
93
|
+
print(f"PTS: {chunk.pts:.2f}s")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Streaming Encode
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from humecodec import MediaEncoder
|
|
100
|
+
import torch
|
|
101
|
+
|
|
102
|
+
encoder = MediaEncoder("output.wav")
|
|
103
|
+
encoder.add_audio_stream(
|
|
104
|
+
sample_rate=44100,
|
|
105
|
+
num_channels=2,
|
|
106
|
+
format="flt", # 32-bit float input
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
with encoder.open():
|
|
110
|
+
# Write audio in chunks
|
|
111
|
+
for chunk in generate_audio_chunks():
|
|
112
|
+
encoder.write_audio_chunk(0, chunk)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Video Support
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from humecodec import MediaDecoder, MediaEncoder
|
|
119
|
+
|
|
120
|
+
# Decode video
|
|
121
|
+
decoder = MediaDecoder("video.mp4")
|
|
122
|
+
decoder.add_video_stream(
|
|
123
|
+
frames_per_chunk=1,
|
|
124
|
+
format="rgb24", # Output as RGB
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
for (frame,) in decoder.stream():
|
|
128
|
+
if frame is not None:
|
|
129
|
+
# frame shape: (1, 3, height, width)
|
|
130
|
+
print(f"Frame at {frame.pts:.2f}s")
|
|
131
|
+
|
|
132
|
+
# Encode video
|
|
133
|
+
encoder = MediaEncoder("output.mp4")
|
|
134
|
+
encoder.add_video_stream(
|
|
135
|
+
frame_rate=30.0,
|
|
136
|
+
width=1920,
|
|
137
|
+
height=1080,
|
|
138
|
+
format="rgb24",
|
|
139
|
+
encoder="libx264",
|
|
140
|
+
encoder_option={"crf": "23", "preset": "medium"},
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
with encoder.open():
|
|
144
|
+
for frame in frames:
|
|
145
|
+
# frame shape: (1, 3, height, width), dtype uint8
|
|
146
|
+
encoder.write_video_chunk(0, frame)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Custom Filter Graphs
|
|
150
|
+
|
|
151
|
+
Apply FFmpeg filters during decode:
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
from humecodec import MediaDecoder
|
|
155
|
+
|
|
156
|
+
decoder = MediaDecoder("audio.wav")
|
|
157
|
+
|
|
158
|
+
# Add audio stream with filter (resample + convert to mono)
|
|
159
|
+
decoder.add_audio_stream(
|
|
160
|
+
frames_per_chunk=-1, # Read all at once
|
|
161
|
+
buffer_chunk_size=-1,
|
|
162
|
+
filter_desc="aresample=16000,aformat=sample_fmts=fltp:channel_layouts=mono",
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
decoder.process_all_packets()
|
|
166
|
+
chunks = decoder.pop_chunks()
|
|
167
|
+
waveform = chunks[0] # Resampled mono audio
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Seeking
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from humecodec import MediaDecoder
|
|
174
|
+
|
|
175
|
+
decoder = MediaDecoder("audio.mp3")
|
|
176
|
+
decoder.add_audio_stream(frames_per_chunk=44100)
|
|
177
|
+
|
|
178
|
+
# Seek to 30 seconds
|
|
179
|
+
decoder.seek(30.0, mode="precise") # or "key" for keyframe-only
|
|
180
|
+
|
|
181
|
+
for (chunk,) in decoder.stream():
|
|
182
|
+
# Chunks start from ~30s
|
|
183
|
+
print(f"PTS: {chunk.pts}")
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## API Reference
|
|
187
|
+
|
|
188
|
+
### Convenience Functions
|
|
189
|
+
|
|
190
|
+
| Function | Description |
|
|
191
|
+
|----------|-------------|
|
|
192
|
+
| `load_audio(path, ...)` | Load audio file to tensor |
|
|
193
|
+
| `save_audio(path, waveform, sample_rate, ...)` | Save tensor to audio file |
|
|
194
|
+
| `info(path)` | Get audio file metadata |
|
|
195
|
+
|
|
196
|
+
### Classes
|
|
197
|
+
|
|
198
|
+
| Class | Description |
|
|
199
|
+
|-------|-------------|
|
|
200
|
+
| `MediaDecoder` | Streaming decoder for audio/video |
|
|
201
|
+
| `MediaEncoder` | Streaming encoder for audio/video |
|
|
202
|
+
| `CodecConfig` | Codec configuration (bit_rate, gop_size, etc.) |
|
|
203
|
+
|
|
204
|
+
### Query Functions
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
import humecodec
|
|
208
|
+
|
|
209
|
+
# List available codecs
|
|
210
|
+
humecodec.get_audio_decoders() # {'mp3': 'MP3 ...', 'aac': 'AAC ...', ...}
|
|
211
|
+
humecodec.get_audio_encoders()
|
|
212
|
+
humecodec.get_video_decoders()
|
|
213
|
+
humecodec.get_video_encoders()
|
|
214
|
+
|
|
215
|
+
# List available formats
|
|
216
|
+
humecodec.get_demuxers() # Input formats
|
|
217
|
+
humecodec.get_muxers() # Output formats
|
|
218
|
+
|
|
219
|
+
# Get FFmpeg library versions
|
|
220
|
+
humecodec.get_versions()
|
|
221
|
+
# {'libavcodec': (62, 11, 100), 'libavformat': (62, 3, 100), ...}
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## Tensor Formats
|
|
225
|
+
|
|
226
|
+
### Audio
|
|
227
|
+
|
|
228
|
+
- **Shape**: `(num_frames, num_channels)`
|
|
229
|
+
- **dtype**: `torch.float32` (default), range `[-1.0, 1.0]`
|
|
230
|
+
- Stereo: `(N, 2)`, Mono: `(N, 1)`
|
|
231
|
+
|
|
232
|
+
### Video
|
|
233
|
+
|
|
234
|
+
- **Shape**: `(num_frames, channels, height, width)`
|
|
235
|
+
- **dtype**: `torch.uint8` for RGB/BGR, `torch.float32` for YUV
|
|
236
|
+
- RGB24: `(N, 3, H, W)`, values `[0, 255]`
|
|
237
|
+
|
|
238
|
+
## Supported Formats
|
|
239
|
+
|
|
240
|
+
The bundled FFmpeg includes support for common formats:
|
|
241
|
+
|
|
242
|
+
**Audio**: WAV, MP3, AAC, FLAC, OGG/Vorbis, Opus
|
|
243
|
+
**Video**: H.264, H.265/HEVC, VP8, VP9, AV1
|
|
244
|
+
**Containers**: MP4, MKV, WebM, AVI, MOV
|
|
245
|
+
|
|
246
|
+
## Building from Source
|
|
247
|
+
|
|
248
|
+
For development or custom FFmpeg builds:
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
git clone https://github.com/your-org/humecodec
|
|
252
|
+
cd humecodec
|
|
253
|
+
|
|
254
|
+
# Install with system FFmpeg
|
|
255
|
+
pip install -e .
|
|
256
|
+
|
|
257
|
+
# Or with custom FFmpeg location
|
|
258
|
+
HUMECODEC_FFMPEG_ROOT=/path/to/ffmpeg pip install -e .
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Building Wheels Locally
|
|
262
|
+
|
|
263
|
+
To build manylinux wheels with bundled FFmpeg libraries using Docker:
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
# Install cibuildwheel
|
|
267
|
+
pip install cibuildwheel
|
|
268
|
+
|
|
269
|
+
# Build wheel for current Python version (e.g., cp310)
|
|
270
|
+
sudo CIBW_MANYLINUX_X86_64_IMAGE=quay.io/pypa/manylinux_2_28_x86_64 \
|
|
271
|
+
cibuildwheel --only cp310-manylinux_x86_64 --output-dir wheelhouse
|
|
272
|
+
|
|
273
|
+
# Build all Python versions
|
|
274
|
+
sudo CIBW_MANYLINUX_X86_64_IMAGE=quay.io/pypa/manylinux_2_28_x86_64 \
|
|
275
|
+
cibuildwheel --output-dir wheelhouse
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
The resulting wheel (~38 MB) includes all FFmpeg libraries and works without any system FFmpeg installation.
|
|
279
|
+
|
|
280
|
+
## License
|
|
281
|
+
|
|
282
|
+
BSD-3-Clause
|
|
283
|
+
|
|
284
|
+
This project bundles FFmpeg libraries which are licensed under LGPL/GPL. See FFmpeg's license for details.
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64", "pybind11>=2.10"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "humecodec"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "FFmpeg integration for PyTorch with bundled libraries"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = {text = "BSD-3-Clause"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "humecodec contributors"}
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"License :: OSI Approved :: BSD License",
|
|
20
|
+
"Operating System :: MacOS :: MacOS X",
|
|
21
|
+
"Operating System :: Microsoft :: Windows",
|
|
22
|
+
"Operating System :: POSIX :: Linux",
|
|
23
|
+
"Programming Language :: Python :: 3",
|
|
24
|
+
"Programming Language :: Python :: 3.9",
|
|
25
|
+
"Programming Language :: Python :: 3.10",
|
|
26
|
+
"Programming Language :: Python :: 3.11",
|
|
27
|
+
"Programming Language :: Python :: 3.12",
|
|
28
|
+
"Programming Language :: Python :: 3.13",
|
|
29
|
+
"Topic :: Multimedia :: Sound/Audio",
|
|
30
|
+
"Topic :: Multimedia :: Video",
|
|
31
|
+
"Topic :: Scientific/Engineering",
|
|
32
|
+
]
|
|
33
|
+
keywords = ["audio", "video", "ffmpeg", "pytorch", "machine learning"]
|
|
34
|
+
dependencies = ["torch>=2.0"]
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://github.com/your-org/humecodec"
|
|
38
|
+
Documentation = "https://github.com/your-org/humecodec#readme"
|
|
39
|
+
Repository = "https://github.com/your-org/humecodec"
|
|
40
|
+
Issues = "https://github.com/your-org/humecodec/issues"
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["src"]
|
|
44
|
+
include = ["humecodec", "humecodec.*"]
|
|
45
|
+
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# cibuildwheel configuration
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
[tool.cibuildwheel]
|
|
51
|
+
# Build for Python 3.9+
|
|
52
|
+
build = "cp39-* cp310-* cp311-* cp312-* cp313-*"
|
|
53
|
+
|
|
54
|
+
# Skip musllinux and PyPy
|
|
55
|
+
skip = "*-musllinux_* pp*"
|
|
56
|
+
|
|
57
|
+
# Test command
|
|
58
|
+
test-command = "python -c \"import humecodec; print(humecodec.get_versions())\""
|
|
59
|
+
test-requires = ["pytest", "numpy"]
|
|
60
|
+
|
|
61
|
+
[tool.cibuildwheel.linux]
|
|
62
|
+
# Download FFmpeg before building
|
|
63
|
+
before-build = "pip install pybind11 && python scripts/fetch-ffmpeg.py /tmp/ffmpeg"
|
|
64
|
+
|
|
65
|
+
# Environment for finding FFmpeg
|
|
66
|
+
environment = { PKG_CONFIG_PATH = "/tmp/ffmpeg/lib/pkgconfig", LD_LIBRARY_PATH = "/tmp/ffmpeg/lib:$LD_LIBRARY_PATH", HUMECODEC_FFMPEG_ROOT = "/tmp/ffmpeg" }
|
|
67
|
+
|
|
68
|
+
# Repair wheel to bundle FFmpeg libraries (exclude PyTorch/CUDA - user provides these)
|
|
69
|
+
# Uses custom script that also patches RPATHs in bundled libraries
|
|
70
|
+
repair-wheel-command = "bash scripts/repair-wheel.sh {wheel} {dest_dir}"
|
|
71
|
+
|
|
72
|
+
[tool.cibuildwheel.macos]
|
|
73
|
+
before-build = "pip install pybind11 && python scripts/fetch-ffmpeg.py /tmp/ffmpeg"
|
|
74
|
+
environment = { PKG_CONFIG_PATH = "/tmp/ffmpeg/lib/pkgconfig", DYLD_LIBRARY_PATH = "/tmp/ffmpeg/lib:$DYLD_LIBRARY_PATH", HUMECODEC_FFMPEG_ROOT = "/tmp/ffmpeg", LDFLAGS = "-headerpad_max_install_names", MACOSX_DEPLOYMENT_TARGET = "14.0" }
|
|
75
|
+
repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest_dir} {wheel}"
|
|
76
|
+
|
|
77
|
+
[tool.cibuildwheel.windows]
|
|
78
|
+
before-build = "pip install pybind11 delvewheel && python scripts\\fetch-ffmpeg.py C:\\ffmpeg"
|
|
79
|
+
environment = { INCLUDE = "C:\\ffmpeg\\include", LIB = "C:\\ffmpeg\\lib", HUMECODEC_FFMPEG_ROOT = "C:\\ffmpeg" }
|
|
80
|
+
repair-wheel-command = "delvewheel repair --add-path C:\\ffmpeg\\bin -w {dest_dir} {wheel}"
|