torchcodec 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +29 -0
  4. data/README.md +69 -0
  5. data/ext/torchcodec/AVIOContextHolder.cpp +60 -0
  6. data/ext/torchcodec/AVIOContextHolder.h +64 -0
  7. data/ext/torchcodec/AVIOTensorContext.cpp +130 -0
  8. data/ext/torchcodec/AVIOTensorContext.h +44 -0
  9. data/ext/torchcodec/CpuDeviceInterface.cpp +509 -0
  10. data/ext/torchcodec/CpuDeviceInterface.h +141 -0
  11. data/ext/torchcodec/DeviceInterface.cpp +117 -0
  12. data/ext/torchcodec/DeviceInterface.h +191 -0
  13. data/ext/torchcodec/Encoder.cpp +1054 -0
  14. data/ext/torchcodec/Encoder.h +192 -0
  15. data/ext/torchcodec/FFMPEGCommon.cpp +684 -0
  16. data/ext/torchcodec/FFMPEGCommon.h +314 -0
  17. data/ext/torchcodec/FilterGraph.cpp +159 -0
  18. data/ext/torchcodec/FilterGraph.h +59 -0
  19. data/ext/torchcodec/Frame.cpp +47 -0
  20. data/ext/torchcodec/Frame.h +72 -0
  21. data/ext/torchcodec/Metadata.cpp +124 -0
  22. data/ext/torchcodec/Metadata.h +92 -0
  23. data/ext/torchcodec/SingleStreamDecoder.cpp +1586 -0
  24. data/ext/torchcodec/SingleStreamDecoder.h +391 -0
  25. data/ext/torchcodec/StableABICompat.h +185 -0
  26. data/ext/torchcodec/StreamOptions.h +70 -0
  27. data/ext/torchcodec/Transform.cpp +128 -0
  28. data/ext/torchcodec/Transform.h +86 -0
  29. data/ext/torchcodec/ValidationUtils.cpp +35 -0
  30. data/ext/torchcodec/ValidationUtils.h +21 -0
  31. data/ext/torchcodec/custom_ops.cpp +913 -0
  32. data/ext/torchcodec/ext.cpp +12 -0
  33. data/ext/torchcodec/extconf.rb +73 -0
  34. data/lib/torchcodec/core/metadata.rb +41 -0
  35. data/lib/torchcodec/decoders/audio_decoder.rb +88 -0
  36. data/lib/torchcodec/decoders/decoder_utils.rb +11 -0
  37. data/lib/torchcodec/version.rb +3 -0
  38. data/lib/torchcodec.rb +28 -0
  39. metadata +90 -0
@@ -0,0 +1,128 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "Transform.h"
8
+ #include <torch/types.h>
9
+ #include "FFMPEGCommon.h"
10
+
11
+ namespace facebook::torchcodec {
12
+
13
+ namespace {
14
+
15
+ std::string toFilterGraphInterpolation(
16
+ ResizeTransform::InterpolationMode mode) {
17
+ switch (mode) {
18
+ case ResizeTransform::InterpolationMode::BILINEAR:
19
+ return "bilinear";
20
+ default:
21
+ TORCH_CHECK(
22
+ false,
23
+ "Unknown interpolation mode: " +
24
+ std::to_string(static_cast<int>(mode)));
25
+ }
26
+ }
27
+
28
+ int toSwsInterpolation(ResizeTransform::InterpolationMode mode) {
29
+ switch (mode) {
30
+ case ResizeTransform::InterpolationMode::BILINEAR:
31
+ return SWS_BILINEAR;
32
+ default:
33
+ TORCH_CHECK(
34
+ false,
35
+ "Unknown interpolation mode: " +
36
+ std::to_string(static_cast<int>(mode)));
37
+ }
38
+ }
39
+
40
+ } // namespace
41
+
42
+ std::string ResizeTransform::getFilterGraphCpu() const {
43
+ return "scale=" + std::to_string(outputDims_.width) + ":" +
44
+ std::to_string(outputDims_.height) +
45
+ ":flags=" + toFilterGraphInterpolation(interpolationMode_);
46
+ }
47
+
48
+ std::optional<FrameDims> ResizeTransform::getOutputFrameDims() const {
49
+ return outputDims_;
50
+ }
51
+
52
+ bool ResizeTransform::isResize() const {
53
+ return true;
54
+ }
55
+
56
+ int ResizeTransform::getSwsFlags() const {
57
+ return toSwsInterpolation(interpolationMode_);
58
+ }
59
+
60
+ CropTransform::CropTransform(const FrameDims& dims) : outputDims_(dims) {}
61
+
62
+ CropTransform::CropTransform(const FrameDims& dims, int x, int y)
63
+ : outputDims_(dims), x_(x), y_(y) {
64
+ TORCH_CHECK(x_ >= 0, "Crop x position must be >= 0, got: ", x_);
65
+ TORCH_CHECK(y_ >= 0, "Crop y position must be >= 0, got: ", y_);
66
+ }
67
+
68
+ std::string CropTransform::getFilterGraphCpu() const {
69
+ // For the FFmpeg filter crop, if the x and y coordinates are left
70
+ // unspecified, it defaults to a center crop.
71
+ std::string coordinates = x_.has_value()
72
+ ? (":" + std::to_string(x_.value()) + ":" + std::to_string(y_.value()))
73
+ : "";
74
+ return "crop=" + std::to_string(outputDims_.width) + ":" +
75
+ std::to_string(outputDims_.height) + coordinates + ":exact=1";
76
+ }
77
+
78
+ std::optional<FrameDims> CropTransform::getOutputFrameDims() const {
79
+ return outputDims_;
80
+ }
81
+
82
+ void CropTransform::validate(const FrameDims& inputDims) const {
83
+ TORCH_CHECK(
84
+ outputDims_.height <= inputDims.height,
85
+ "Crop output height (",
86
+ outputDims_.height,
87
+ ") is greater than input height (",
88
+ inputDims.height,
89
+ ")");
90
+ TORCH_CHECK(
91
+ outputDims_.width <= inputDims.width,
92
+ "Crop output width (",
93
+ outputDims_.width,
94
+ ") is greater than input width (",
95
+ inputDims.width,
96
+ ")");
97
+ TORCH_CHECK(
98
+ x_.has_value() == y_.has_value(),
99
+ "Crop x and y values must be both set or both unset");
100
+ if (x_.has_value()) {
101
+ TORCH_CHECK(
102
+ x_.value() <= inputDims.width,
103
+ "Crop x start position, ",
104
+ x_.value(),
105
+ ", out of bounds of input width, ",
106
+ inputDims.width);
107
+ TORCH_CHECK(
108
+ x_.value() + outputDims_.width <= inputDims.width,
109
+ "Crop x end position, ",
110
+ x_.value() + outputDims_.width,
111
+ ", out of bounds of input width ",
112
+ inputDims.width);
113
+ TORCH_CHECK(
114
+ y_.value() <= inputDims.height,
115
+ "Crop y start position, ",
116
+ y_.value(),
117
+ ", out of bounds of input height, ",
118
+ inputDims.height);
119
+ TORCH_CHECK(
120
+ y_.value() + outputDims_.height <= inputDims.height,
121
+ "Crop y end position, ",
122
+ y_.value() + outputDims_.height,
123
+ ", out of bounds of input height ",
124
+ inputDims.height);
125
+ }
126
+ }
127
+
128
+ } // namespace facebook::torchcodec
@@ -0,0 +1,86 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <optional>
10
+ #include <string>
11
+ #include "Frame.h"
12
+ #include "Metadata.h"
13
+
14
+ namespace facebook::torchcodec {
15
+
16
+ class Transform {
17
+ public:
18
+ virtual std::string getFilterGraphCpu() const = 0;
19
+ virtual ~Transform() = default;
20
+
21
+ // If the transformation does not change the output frame dimensions, then
22
+ // there is no need to override this member function. The default
23
+ // implementation returns an empty optional, indicating that the output frame
24
+ // has the same dimensions as the input frame.
25
+ //
26
+ // If the transformation does change the output frame dimensions, then it
27
+ // must override this member function and return the output frame dimensions.
28
+ virtual std::optional<FrameDims> getOutputFrameDims() const {
29
+ return std::nullopt;
30
+ }
31
+
32
+ // The ResizeTransform is special because it is the only transform
33
+ // that swscale can handle.
34
+ virtual bool isResize() const {
35
+ return false;
36
+ }
37
+
38
+ // The validity of some transforms depends on the characteristics of the
39
+ // AVStream they're being applied to. For example, some transforms will
40
+ // specify coordinates inside a frame, we need to validate that those are
41
+ // within the frame's bounds.
42
+ //
43
+ // Note that the validation function does not return anything. We expect
44
+ // invalid configurations to throw an exception.
45
+ virtual void validate([[maybe_unused]] const FrameDims& inputDims) const {}
46
+ };
47
+
48
+ class ResizeTransform : public Transform {
49
+ public:
50
+ enum class InterpolationMode { BILINEAR };
51
+
52
+ explicit ResizeTransform(const FrameDims& dims)
53
+ : outputDims_(dims), interpolationMode_(InterpolationMode::BILINEAR) {}
54
+
55
+ ResizeTransform(const FrameDims& dims, InterpolationMode interpolationMode)
56
+ : outputDims_(dims), interpolationMode_(interpolationMode) {}
57
+
58
+ std::string getFilterGraphCpu() const override;
59
+ std::optional<FrameDims> getOutputFrameDims() const override;
60
+ bool isResize() const override;
61
+
62
+ int getSwsFlags() const;
63
+
64
+ private:
65
+ FrameDims outputDims_;
66
+ InterpolationMode interpolationMode_;
67
+ };
68
+
69
+ class CropTransform : public Transform {
70
+ public:
71
+ CropTransform(const FrameDims& dims, int x, int y);
72
+
73
+ // Becomes a center crop if x and y are not specified.
74
+ explicit CropTransform(const FrameDims& dims);
75
+
76
+ std::string getFilterGraphCpu() const override;
77
+ std::optional<FrameDims> getOutputFrameDims() const override;
78
+ void validate(const FrameDims& inputDims) const override;
79
+
80
+ private:
81
+ FrameDims outputDims_;
82
+ std::optional<int> x_;
83
+ std::optional<int> y_;
84
+ };
85
+
86
+ } // namespace facebook::torchcodec
@@ -0,0 +1,35 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "ValidationUtils.h"
8
+ #include <limits>
9
+ #include "c10/util/Exception.h"
10
+
11
+ namespace facebook::torchcodec {
12
+
13
+ int validateInt64ToInt(int64_t value, const std::string& parameterName) {
14
+ TORCH_CHECK(
15
+ value >= std::numeric_limits<int>::min() &&
16
+ value <= std::numeric_limits<int>::max(),
17
+ parameterName,
18
+ "=",
19
+ value,
20
+ " is out of range for int type.");
21
+
22
+ return static_cast<int>(value);
23
+ }
24
+
25
+ std::optional<int> validateOptionalInt64ToInt(
26
+ const std::optional<int64_t>& value,
27
+ const std::string& parameterName) {
28
+ if (value.has_value()) {
29
+ return validateInt64ToInt(value.value(), parameterName);
30
+ } else {
31
+ return std::nullopt;
32
+ }
33
+ }
34
+
35
+ } // namespace facebook::torchcodec
@@ -0,0 +1,21 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <cstdint>
10
+ #include <optional>
11
+ #include <string>
12
+
13
+ namespace facebook::torchcodec {
14
+
15
+ int validateInt64ToInt(int64_t value, const std::string& parameterName);
16
+
17
+ std::optional<int> validateOptionalInt64ToInt(
18
+ const std::optional<int64_t>& value,
19
+ const std::string& parameterName);
20
+
21
+ } // namespace facebook::torchcodec