RubyGems - torchcodec - Versions diffs - 0.1.0 - Mend

torchcodec 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +3 -0
data/LICENSE.txt +29 -0
data/README.md +69 -0
data/ext/torchcodec/AVIOContextHolder.cpp +60 -0
data/ext/torchcodec/AVIOContextHolder.h +64 -0
data/ext/torchcodec/AVIOTensorContext.cpp +130 -0
data/ext/torchcodec/AVIOTensorContext.h +44 -0
data/ext/torchcodec/CpuDeviceInterface.cpp +509 -0
data/ext/torchcodec/CpuDeviceInterface.h +141 -0
data/ext/torchcodec/DeviceInterface.cpp +117 -0
data/ext/torchcodec/DeviceInterface.h +191 -0
data/ext/torchcodec/Encoder.cpp +1054 -0
data/ext/torchcodec/Encoder.h +192 -0
data/ext/torchcodec/FFMPEGCommon.cpp +684 -0
data/ext/torchcodec/FFMPEGCommon.h +314 -0
data/ext/torchcodec/FilterGraph.cpp +159 -0
data/ext/torchcodec/FilterGraph.h +59 -0
data/ext/torchcodec/Frame.cpp +47 -0
data/ext/torchcodec/Frame.h +72 -0
data/ext/torchcodec/Metadata.cpp +124 -0
data/ext/torchcodec/Metadata.h +92 -0
data/ext/torchcodec/SingleStreamDecoder.cpp +1586 -0
data/ext/torchcodec/SingleStreamDecoder.h +391 -0
data/ext/torchcodec/StableABICompat.h +185 -0
data/ext/torchcodec/StreamOptions.h +70 -0
data/ext/torchcodec/Transform.cpp +128 -0
data/ext/torchcodec/Transform.h +86 -0
data/ext/torchcodec/ValidationUtils.cpp +35 -0
data/ext/torchcodec/ValidationUtils.h +21 -0
data/ext/torchcodec/custom_ops.cpp +913 -0
data/ext/torchcodec/ext.cpp +12 -0
data/ext/torchcodec/extconf.rb +73 -0
data/lib/torchcodec/core/metadata.rb +41 -0
data/lib/torchcodec/decoders/audio_decoder.rb +88 -0
data/lib/torchcodec/decoders/decoder_utils.rb +11 -0
data/lib/torchcodec/version.rb +3 -0
data/lib/torchcodec.rb +28 -0
metadata +90 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 292f24721c347356202f9f0c691dcb00bf4835208f7a9e8730964014731a3202
+  data.tar.gz: 9ae50e37cf15a1691aec3dddd07f46dc7a44a32f92ea0a90a99958341b4b5131
+SHA512:
+  metadata.gz: a0d72c1d15a3f67dde0ee86807ad2c72e6bb6da541ec1ad9dfb2ddd1fca4ceb35d428bf279ef704b923a5f6faaccdb2aa6ec2b5b62542aec8d1a186e64788eba
+  data.tar.gz: 34d6865ce8da7c5a0d451633c9b8a4b4b42aafe55ad0c200e870b2e3809c83b3c872b40ba829d68fcacddaeac0de88fd206322248621fb0e077dac8aacc833e9

data/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,3 @@
+## 0.1.0 (2026-02-26)
+- First release

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,29 @@
+BSD 3-Clause License
+Copyright 2024 Meta
+Copyright 2026 Andrew Kane
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice,this list
+of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this
+list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its contributors may
+be used to endorse or promote products derived from this software without specific
+prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.

data/README.md ADDED Viewed

@@ -0,0 +1,69 @@
+# TorchCodec Ruby
+:fire: Media encoding and decoding for Torch.rb
+[![Build Status](https://github.com/ankane/torchcodec-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/torchcodec-ruby/actions)
+## Installation
+First, [install FFmpeg](#ffmpeg-installation). For Homebrew, use:
+```sh
+brew install ffmpeg
+```
+Add this line to your application’s Gemfile:
+```ruby
+gem "torchcodec"
+```
+## Getting Started
+This library follows the [Python API](https://meta-pytorch.org/torchcodec/). Most functionality is missing at the moment. PRs welcome!
+```ruby
+decoder = TorchCodec::Decoders::AudioDecoder.new("file.mp3")
+decoder.metadata
+decoder.get_all_samples
+```
+## FFmpeg Installation
+### Linux
+For Ubuntu, use:
+```sh
+sudo apt install libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev
+```
+### Mac
+```sh
+brew install ffmpeg
+```
+## History
+View the [changelog](https://github.com/ankane/torchcodec-ruby/blob/master/CHANGELOG.md)
+## Contributing
+Everyone is encouraged to help improve this project. Here are a few ways you can help:
+- [Report bugs](https://github.com/ankane/torchcodec-ruby/issues)
+- Fix bugs and [submit pull requests](https://github.com/ankane/torchcodec-ruby/pulls)
+- Write, clarify, or fix documentation
+- Suggest or add new features
+To get started with development:
+```sh
+git clone https://github.com/ankane/torchcodec-ruby.git
+cd torchcodec-ruby
+bundle install
+bundle exec rake compile
+bundle exec rake download:files
+bundle exec rake test
+```

data/ext/torchcodec/AVIOContextHolder.cpp ADDED Viewed

@@ -0,0 +1,60 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#include "AVIOContextHolder.h"
+#include <torch/types.h>
+namespace facebook::torchcodec {
+void AVIOContextHolder::createAVIOContext(
+    AVIOReadFunction read,
+    AVIOWriteFunction write,
+    AVIOSeekFunction seek,
+    void* heldData,
+    bool isForWriting,
+    int bufferSize) {
+  TORCH_CHECK(
+      bufferSize > 0,
+      "Buffer size must be greater than 0; is " + std::to_string(bufferSize));
+  auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
+  TORCH_CHECK(
+      buffer != nullptr,
+      "Failed to allocate buffer of size " + std::to_string(bufferSize));
+  TORCH_CHECK(seek != nullptr, "seek method must be defined");
+  if (isForWriting) {
+    TORCH_CHECK(write != nullptr, "write method must be defined for writing");
+  } else {
+    TORCH_CHECK(read != nullptr, "read method must be defined for reading");
+  }
+  avioContext_.reset(avioAllocContext(
+      buffer,
+      bufferSize,
+      /*write_flag=*/isForWriting,
+      heldData,
+      read,
+      write,
+      seek));
+  if (!avioContext_) {
+    av_freep(&buffer);
+    TORCH_CHECK(false, "Failed to allocate AVIOContext");
+  }
+}
+AVIOContextHolder::~AVIOContextHolder() {
+  if (avioContext_) {
+    av_freep(&avioContext_->buffer);
+  }
+}
+AVIOContext* AVIOContextHolder::getAVIOContext() {
+  return avioContext_.get();
+}
+} // namespace facebook::torchcodec

data/ext/torchcodec/AVIOContextHolder.h ADDED Viewed

@@ -0,0 +1,64 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include "FFMPEGCommon.h"
+namespace facebook::torchcodec {
+// The AVIOContextHolder serves several purposes:
+//
+//   1. It is a smart pointer for the AVIOContext. It has the logic to create
+//      a new AVIOContext and will appropriately free the AVIOContext when it
+//      goes out of scope. Note that this requires more than just having a
+//      UniqueAVIOContext, as the AVIOContext points to a buffer which must be
+//      freed.
+//   2. It is a base class for AVIOContext specializations. When specializing a
+//      AVIOContext, we need to provide four things:
+//        1. A read callback function, for decoding.
+//        2. A seek callback function, for decoding and encoding.
+//        3. A write callback function, for encoding.
+//        4. A pointer to some context object that has the same lifetime as the
+//           AVIOContext itself. This context object holds the custom state that
+//           tracks the custom behavior of reading, seeking and writing. It is
+//           provided upon AVIOContext creation and to the read, seek and
+//           write callback functions.
+//      The callback functions do not need to be members of the derived class,
+//      but the derived class must have access to them. The context object must
+//      be a member of the derived class. Derived classes need to call
+//      createAVIOContext(), ideally in their constructor.
+//  3. A generic handle for those that just need to manage having access to an
+//     AVIOContext, but aren't necessarily concerned with how it was customized:
+//     typically, the SingleStreamDecoder.
+class AVIOContextHolder {
+ public:
+  virtual ~AVIOContextHolder();
+  AVIOContext* getAVIOContext();
+ protected:
+  // Make constructor protected to prevent anyone from constructing
+  // an AVIOContextHolder without deriving it. (Ordinarily this would be
+  // enforced by having a pure virtual methods, but we don't have any.)
+  AVIOContextHolder() = default;
+  // Deriving classes should call this function in their constructor.
+  void createAVIOContext(
+      AVIOReadFunction read,
+      AVIOWriteFunction write,
+      AVIOSeekFunction seek,
+      void* heldData,
+      bool isForWriting,
+      int bufferSize = defaultBufferSize);
+ private:
+  UniqueAVIOContext avioContext_;
+  // Defaults to 64 KB
+  static const int defaultBufferSize = 64 * 1024;
+};
+} // namespace facebook::torchcodec

data/ext/torchcodec/AVIOTensorContext.cpp ADDED Viewed

@@ -0,0 +1,130 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#include "AVIOTensorContext.h"
+#include <torch/types.h>
+namespace facebook::torchcodec {
+namespace {
+constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB
+constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB
+// The signature of this function is defined by FFMPEG.
+int read(void* opaque, uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  TORCH_CHECK(
+      tensorContext->current_pos <= tensorContext->data.numel(),
+      "Tried to read outside of the buffer: current_pos=",
+      tensorContext->current_pos,
+      ", size=",
+      tensorContext->data.numel());
+  int64_t numBytesRead = std::min(
+      static_cast<int64_t>(buf_size),
+      tensorContext->data.numel() - tensorContext->current_pos);
+  TORCH_CHECK(
+      numBytesRead >= 0,
+      "Tried to read negative bytes: numBytesRead=",
+      numBytesRead,
+      ", size=",
+      tensorContext->data.numel(),
+      ", current_pos=",
+      tensorContext->current_pos);
+  if (numBytesRead == 0) {
+    return AVERROR_EOF;
+  }
+  std::memcpy(
+      buf,
+      tensorContext->data.data_ptr<uint8_t>() + tensorContext->current_pos,
+      numBytesRead);
+  tensorContext->current_pos += numBytesRead;
+  return numBytesRead;
+}
+// The signature of this function is defined by FFMPEG.
+int write(void* opaque, const uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  int64_t bufSize = static_cast<int64_t>(buf_size);
+  if (tensorContext->current_pos + bufSize > tensorContext->data.numel()) {
+    TORCH_CHECK(
+        tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
+        "We tried to allocate an output encoded tensor larger than ",
+        MAX_TENSOR_SIZE,
+        " bytes. If you think this should be supported, please report.");
+    // We double the size of the outpout tensor. Calling cat() may not be the
+    // most efficient, but it's simple.
+    tensorContext->data =
+        torch::cat({tensorContext->data, tensorContext->data});
+  }
+  TORCH_CHECK(
+      tensorContext->current_pos + bufSize <= tensorContext->data.numel(),
+      "Re-allocation of the output tensor didn't work. ",
+      "This should not happen, please report on TorchCodec bug tracker");
+  uint8_t* outputTensorData = tensorContext->data.data_ptr<uint8_t>();
+  std::memcpy(outputTensorData + tensorContext->current_pos, buf, bufSize);
+  tensorContext->current_pos += bufSize;
+  // Track the maximum position written so getOutputTensor's narrow() does not
+  // truncate the file if final seek was backwards
+  tensorContext->max_pos =
+      std::max(tensorContext->current_pos, tensorContext->max_pos);
+  return buf_size;
+}
+// The signature of this function is defined by FFMPEG.
+int64_t seek(void* opaque, int64_t offset, int whence) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  int64_t ret = -1;
+  switch (whence) {
+    case AVSEEK_SIZE:
+      ret = tensorContext->data.numel();
+      break;
+    case SEEK_SET:
+      tensorContext->current_pos = offset;
+      ret = offset;
+      break;
+    default:
+      break;
+  }
+  return ret;
+}
+} // namespace
+AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
+    : tensorContext_{data, 0, 0} {
+  TORCH_CHECK(data.numel() > 0, "data must not be empty");
+  TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
+  TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
+  createAVIOContext(
+      &read, nullptr, &seek, &tensorContext_, /*isForWriting=*/false);
+}
+AVIOToTensorContext::AVIOToTensorContext()
+    : tensorContext_{
+          torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}),
+          0,
+          0} {
+  createAVIOContext(
+      nullptr, &write, &seek, &tensorContext_, /*isForWriting=*/true);
+}
+torch::Tensor AVIOToTensorContext::getOutputTensor() {
+  return tensorContext_.data.narrow(
+      /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.max_pos);
+}
+} // namespace facebook::torchcodec

data/ext/torchcodec/AVIOTensorContext.h ADDED Viewed

@@ -0,0 +1,44 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include <torch/types.h>
+#include "AVIOContextHolder.h"
+namespace facebook::torchcodec {
+namespace detail {
+struct TensorContext {
+  torch::Tensor data;
+  int64_t current_pos;
+  int64_t max_pos;
+};
+} // namespace detail
+// For Decoding: enables users to pass in the entire video or audio as bytes.
+// Our read and seek functions then traverse the bytes in memory.
+class AVIOFromTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOFromTensorContext(torch::Tensor data);
+ private:
+  detail::TensorContext tensorContext_;
+};
+// For Encoding: used to encode into an output uint8 (bytes) tensor.
+class AVIOToTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOToTensorContext();
+  torch::Tensor getOutputTensor();
+ private:
+  detail::TensorContext tensorContext_;
+};
+} // namespace facebook::torchcodec