torchcodec 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +29 -0
  4. data/README.md +69 -0
  5. data/ext/torchcodec/AVIOContextHolder.cpp +60 -0
  6. data/ext/torchcodec/AVIOContextHolder.h +64 -0
  7. data/ext/torchcodec/AVIOTensorContext.cpp +130 -0
  8. data/ext/torchcodec/AVIOTensorContext.h +44 -0
  9. data/ext/torchcodec/CpuDeviceInterface.cpp +509 -0
  10. data/ext/torchcodec/CpuDeviceInterface.h +141 -0
  11. data/ext/torchcodec/DeviceInterface.cpp +117 -0
  12. data/ext/torchcodec/DeviceInterface.h +191 -0
  13. data/ext/torchcodec/Encoder.cpp +1054 -0
  14. data/ext/torchcodec/Encoder.h +192 -0
  15. data/ext/torchcodec/FFMPEGCommon.cpp +684 -0
  16. data/ext/torchcodec/FFMPEGCommon.h +314 -0
  17. data/ext/torchcodec/FilterGraph.cpp +159 -0
  18. data/ext/torchcodec/FilterGraph.h +59 -0
  19. data/ext/torchcodec/Frame.cpp +47 -0
  20. data/ext/torchcodec/Frame.h +72 -0
  21. data/ext/torchcodec/Metadata.cpp +124 -0
  22. data/ext/torchcodec/Metadata.h +92 -0
  23. data/ext/torchcodec/SingleStreamDecoder.cpp +1586 -0
  24. data/ext/torchcodec/SingleStreamDecoder.h +391 -0
  25. data/ext/torchcodec/StableABICompat.h +185 -0
  26. data/ext/torchcodec/StreamOptions.h +70 -0
  27. data/ext/torchcodec/Transform.cpp +128 -0
  28. data/ext/torchcodec/Transform.h +86 -0
  29. data/ext/torchcodec/ValidationUtils.cpp +35 -0
  30. data/ext/torchcodec/ValidationUtils.h +21 -0
  31. data/ext/torchcodec/custom_ops.cpp +913 -0
  32. data/ext/torchcodec/ext.cpp +12 -0
  33. data/ext/torchcodec/extconf.rb +73 -0
  34. data/lib/torchcodec/core/metadata.rb +41 -0
  35. data/lib/torchcodec/decoders/audio_decoder.rb +88 -0
  36. data/lib/torchcodec/decoders/decoder_utils.rb +11 -0
  37. data/lib/torchcodec/version.rb +3 -0
  38. data/lib/torchcodec.rb +28 -0
  39. metadata +90 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 292f24721c347356202f9f0c691dcb00bf4835208f7a9e8730964014731a3202
4
+ data.tar.gz: 9ae50e37cf15a1691aec3dddd07f46dc7a44a32f92ea0a90a99958341b4b5131
5
+ SHA512:
6
+ metadata.gz: a0d72c1d15a3f67dde0ee86807ad2c72e6bb6da541ec1ad9dfb2ddd1fca4ceb35d428bf279ef704b923a5f6faaccdb2aa6ec2b5b62542aec8d1a186e64788eba
7
+ data.tar.gz: 34d6865ce8da7c5a0d451633c9b8a4b4b42aafe55ad0c200e870b2e3809c83b3c872b40ba829d68fcacddaeac0de88fd206322248621fb0e077dac8aacc833e9
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0 (2026-02-26)
2
+
3
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright 2024 Meta
4
+ Copyright 2026 Andrew Kane
5
+
6
+ Redistribution and use in source and binary forms, with or without modification,
7
+ are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice,this list
10
+ of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice, this
13
+ list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its contributors may
17
+ be used to endorse or promote products derived from this software without specific
18
+ prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY
21
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
23
+ SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
25
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26
+ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28
+ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29
+ DAMAGE.
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # TorchCodec Ruby
2
+
3
+ :fire: Media encoding and decoding for Torch.rb
4
+
5
+ [![Build Status](https://github.com/ankane/torchcodec-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/torchcodec-ruby/actions)
6
+
7
+ ## Installation
8
+
9
+ First, [install FFmpeg](#ffmpeg-installation). For Homebrew, use:
10
+
11
+ ```sh
12
+ brew install ffmpeg
13
+ ```
14
+
15
+ Add this line to your application’s Gemfile:
16
+
17
+ ```ruby
18
+ gem "torchcodec"
19
+ ```
20
+
21
+ ## Getting Started
22
+
23
+ This library follows the [Python API](https://meta-pytorch.org/torchcodec/). Most functionality is missing at the moment. PRs welcome!
24
+
25
+ ```ruby
26
+ decoder = TorchCodec::Decoders::AudioDecoder.new("file.mp3")
27
+ decoder.metadata
28
+ decoder.get_all_samples
29
+ ```
30
+
31
+ ## FFmpeg Installation
32
+
33
+ ### Linux
34
+
35
+ For Ubuntu, use:
36
+
37
+ ```sh
38
+ sudo apt install libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev
39
+ ```
40
+
41
+ ### Mac
42
+
43
+ ```sh
44
+ brew install ffmpeg
45
+ ```
46
+
47
+ ## History
48
+
49
+ View the [changelog](https://github.com/ankane/torchcodec-ruby/blob/master/CHANGELOG.md)
50
+
51
+ ## Contributing
52
+
53
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
54
+
55
+ - [Report bugs](https://github.com/ankane/torchcodec-ruby/issues)
56
+ - Fix bugs and [submit pull requests](https://github.com/ankane/torchcodec-ruby/pulls)
57
+ - Write, clarify, or fix documentation
58
+ - Suggest or add new features
59
+
60
+ To get started with development:
61
+
62
+ ```sh
63
+ git clone https://github.com/ankane/torchcodec-ruby.git
64
+ cd torchcodec-ruby
65
+ bundle install
66
+ bundle exec rake compile
67
+ bundle exec rake download:files
68
+ bundle exec rake test
69
+ ```
@@ -0,0 +1,60 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "AVIOContextHolder.h"
8
+ #include <torch/types.h>
9
+
10
+ namespace facebook::torchcodec {
11
+
12
+ void AVIOContextHolder::createAVIOContext(
13
+ AVIOReadFunction read,
14
+ AVIOWriteFunction write,
15
+ AVIOSeekFunction seek,
16
+ void* heldData,
17
+ bool isForWriting,
18
+ int bufferSize) {
19
+ TORCH_CHECK(
20
+ bufferSize > 0,
21
+ "Buffer size must be greater than 0; is " + std::to_string(bufferSize));
22
+ auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
23
+ TORCH_CHECK(
24
+ buffer != nullptr,
25
+ "Failed to allocate buffer of size " + std::to_string(bufferSize));
26
+
27
+ TORCH_CHECK(seek != nullptr, "seek method must be defined");
28
+
29
+ if (isForWriting) {
30
+ TORCH_CHECK(write != nullptr, "write method must be defined for writing");
31
+ } else {
32
+ TORCH_CHECK(read != nullptr, "read method must be defined for reading");
33
+ }
34
+
35
+ avioContext_.reset(avioAllocContext(
36
+ buffer,
37
+ bufferSize,
38
+ /*write_flag=*/isForWriting,
39
+ heldData,
40
+ read,
41
+ write,
42
+ seek));
43
+
44
+ if (!avioContext_) {
45
+ av_freep(&buffer);
46
+ TORCH_CHECK(false, "Failed to allocate AVIOContext");
47
+ }
48
+ }
49
+
50
+ AVIOContextHolder::~AVIOContextHolder() {
51
+ if (avioContext_) {
52
+ av_freep(&avioContext_->buffer);
53
+ }
54
+ }
55
+
56
+ AVIOContext* AVIOContextHolder::getAVIOContext() {
57
+ return avioContext_.get();
58
+ }
59
+
60
+ } // namespace facebook::torchcodec
@@ -0,0 +1,64 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include "FFMPEGCommon.h"
10
+
11
+ namespace facebook::torchcodec {
12
+
13
+ // The AVIOContextHolder serves several purposes:
14
+ //
15
+ // 1. It is a smart pointer for the AVIOContext. It has the logic to create
16
+ // a new AVIOContext and will appropriately free the AVIOContext when it
17
+ // goes out of scope. Note that this requires more than just having a
18
+ // UniqueAVIOContext, as the AVIOContext points to a buffer which must be
19
+ // freed.
20
+ // 2. It is a base class for AVIOContext specializations. When specializing a
21
+ // AVIOContext, we need to provide four things:
22
+ // 1. A read callback function, for decoding.
23
+ // 2. A seek callback function, for decoding and encoding.
24
+ // 3. A write callback function, for encoding.
25
+ // 4. A pointer to some context object that has the same lifetime as the
26
+ // AVIOContext itself. This context object holds the custom state that
27
+ // tracks the custom behavior of reading, seeking and writing. It is
28
+ // provided upon AVIOContext creation and to the read, seek and
29
+ // write callback functions.
30
+ // The callback functions do not need to be members of the derived class,
31
+ // but the derived class must have access to them. The context object must
32
+ // be a member of the derived class. Derived classes need to call
33
+ // createAVIOContext(), ideally in their constructor.
34
+ // 3. A generic handle for those that just need to manage having access to an
35
+ // AVIOContext, but aren't necessarily concerned with how it was customized:
36
+ // typically, the SingleStreamDecoder.
37
+ class AVIOContextHolder {
38
+ public:
39
+ virtual ~AVIOContextHolder();
40
+ AVIOContext* getAVIOContext();
41
+
42
+ protected:
43
+ // Make constructor protected to prevent anyone from constructing
44
+ // an AVIOContextHolder without deriving it. (Ordinarily this would be
45
+ // enforced by having a pure virtual methods, but we don't have any.)
46
+ AVIOContextHolder() = default;
47
+
48
+ // Deriving classes should call this function in their constructor.
49
+ void createAVIOContext(
50
+ AVIOReadFunction read,
51
+ AVIOWriteFunction write,
52
+ AVIOSeekFunction seek,
53
+ void* heldData,
54
+ bool isForWriting,
55
+ int bufferSize = defaultBufferSize);
56
+
57
+ private:
58
+ UniqueAVIOContext avioContext_;
59
+
60
+ // Defaults to 64 KB
61
+ static const int defaultBufferSize = 64 * 1024;
62
+ };
63
+
64
+ } // namespace facebook::torchcodec
@@ -0,0 +1,130 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "AVIOTensorContext.h"
8
+ #include <torch/types.h>
9
+
10
+ namespace facebook::torchcodec {
11
+
12
+ namespace {
13
+
14
+ constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB
15
+ constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB
16
+
17
+ // The signature of this function is defined by FFMPEG.
18
+ int read(void* opaque, uint8_t* buf, int buf_size) {
19
+ auto tensorContext = static_cast<detail::TensorContext*>(opaque);
20
+ TORCH_CHECK(
21
+ tensorContext->current_pos <= tensorContext->data.numel(),
22
+ "Tried to read outside of the buffer: current_pos=",
23
+ tensorContext->current_pos,
24
+ ", size=",
25
+ tensorContext->data.numel());
26
+
27
+ int64_t numBytesRead = std::min(
28
+ static_cast<int64_t>(buf_size),
29
+ tensorContext->data.numel() - tensorContext->current_pos);
30
+
31
+ TORCH_CHECK(
32
+ numBytesRead >= 0,
33
+ "Tried to read negative bytes: numBytesRead=",
34
+ numBytesRead,
35
+ ", size=",
36
+ tensorContext->data.numel(),
37
+ ", current_pos=",
38
+ tensorContext->current_pos);
39
+
40
+ if (numBytesRead == 0) {
41
+ return AVERROR_EOF;
42
+ }
43
+
44
+ std::memcpy(
45
+ buf,
46
+ tensorContext->data.data_ptr<uint8_t>() + tensorContext->current_pos,
47
+ numBytesRead);
48
+ tensorContext->current_pos += numBytesRead;
49
+ return numBytesRead;
50
+ }
51
+
52
+ // The signature of this function is defined by FFMPEG.
53
+ int write(void* opaque, const uint8_t* buf, int buf_size) {
54
+ auto tensorContext = static_cast<detail::TensorContext*>(opaque);
55
+
56
+ int64_t bufSize = static_cast<int64_t>(buf_size);
57
+ if (tensorContext->current_pos + bufSize > tensorContext->data.numel()) {
58
+ TORCH_CHECK(
59
+ tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
60
+ "We tried to allocate an output encoded tensor larger than ",
61
+ MAX_TENSOR_SIZE,
62
+ " bytes. If you think this should be supported, please report.");
63
+
64
+ // We double the size of the outpout tensor. Calling cat() may not be the
65
+ // most efficient, but it's simple.
66
+ tensorContext->data =
67
+ torch::cat({tensorContext->data, tensorContext->data});
68
+ }
69
+
70
+ TORCH_CHECK(
71
+ tensorContext->current_pos + bufSize <= tensorContext->data.numel(),
72
+ "Re-allocation of the output tensor didn't work. ",
73
+ "This should not happen, please report on TorchCodec bug tracker");
74
+
75
+ uint8_t* outputTensorData = tensorContext->data.data_ptr<uint8_t>();
76
+ std::memcpy(outputTensorData + tensorContext->current_pos, buf, bufSize);
77
+ tensorContext->current_pos += bufSize;
78
+ // Track the maximum position written so getOutputTensor's narrow() does not
79
+ // truncate the file if final seek was backwards
80
+ tensorContext->max_pos =
81
+ std::max(tensorContext->current_pos, tensorContext->max_pos);
82
+ return buf_size;
83
+ }
84
+
85
+ // The signature of this function is defined by FFMPEG.
86
+ int64_t seek(void* opaque, int64_t offset, int whence) {
87
+ auto tensorContext = static_cast<detail::TensorContext*>(opaque);
88
+ int64_t ret = -1;
89
+
90
+ switch (whence) {
91
+ case AVSEEK_SIZE:
92
+ ret = tensorContext->data.numel();
93
+ break;
94
+ case SEEK_SET:
95
+ tensorContext->current_pos = offset;
96
+ ret = offset;
97
+ break;
98
+ default:
99
+ break;
100
+ }
101
+
102
+ return ret;
103
+ }
104
+
105
+ } // namespace
106
+
107
+ AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
108
+ : tensorContext_{data, 0, 0} {
109
+ TORCH_CHECK(data.numel() > 0, "data must not be empty");
110
+ TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
111
+ TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
112
+ createAVIOContext(
113
+ &read, nullptr, &seek, &tensorContext_, /*isForWriting=*/false);
114
+ }
115
+
116
+ AVIOToTensorContext::AVIOToTensorContext()
117
+ : tensorContext_{
118
+ torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}),
119
+ 0,
120
+ 0} {
121
+ createAVIOContext(
122
+ nullptr, &write, &seek, &tensorContext_, /*isForWriting=*/true);
123
+ }
124
+
125
+ torch::Tensor AVIOToTensorContext::getOutputTensor() {
126
+ return tensorContext_.data.narrow(
127
+ /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.max_pos);
128
+ }
129
+
130
+ } // namespace facebook::torchcodec
@@ -0,0 +1,44 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <torch/types.h>
10
+ #include "AVIOContextHolder.h"
11
+
12
+ namespace facebook::torchcodec {
13
+
14
+ namespace detail {
15
+
16
+ struct TensorContext {
17
+ torch::Tensor data;
18
+ int64_t current_pos;
19
+ int64_t max_pos;
20
+ };
21
+
22
+ } // namespace detail
23
+
24
+ // For Decoding: enables users to pass in the entire video or audio as bytes.
25
+ // Our read and seek functions then traverse the bytes in memory.
26
+ class AVIOFromTensorContext : public AVIOContextHolder {
27
+ public:
28
+ explicit AVIOFromTensorContext(torch::Tensor data);
29
+
30
+ private:
31
+ detail::TensorContext tensorContext_;
32
+ };
33
+
34
+ // For Encoding: used to encode into an output uint8 (bytes) tensor.
35
+ class AVIOToTensorContext : public AVIOContextHolder {
36
+ public:
37
+ explicit AVIOToTensorContext();
38
+ torch::Tensor getOutputTensor();
39
+
40
+ private:
41
+ detail::TensorContext tensorContext_;
42
+ };
43
+
44
+ } // namespace facebook::torchcodec