torchcodec 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +29 -0
- data/README.md +69 -0
- data/ext/torchcodec/AVIOContextHolder.cpp +60 -0
- data/ext/torchcodec/AVIOContextHolder.h +64 -0
- data/ext/torchcodec/AVIOTensorContext.cpp +130 -0
- data/ext/torchcodec/AVIOTensorContext.h +44 -0
- data/ext/torchcodec/CpuDeviceInterface.cpp +509 -0
- data/ext/torchcodec/CpuDeviceInterface.h +141 -0
- data/ext/torchcodec/DeviceInterface.cpp +117 -0
- data/ext/torchcodec/DeviceInterface.h +191 -0
- data/ext/torchcodec/Encoder.cpp +1054 -0
- data/ext/torchcodec/Encoder.h +192 -0
- data/ext/torchcodec/FFMPEGCommon.cpp +684 -0
- data/ext/torchcodec/FFMPEGCommon.h +314 -0
- data/ext/torchcodec/FilterGraph.cpp +159 -0
- data/ext/torchcodec/FilterGraph.h +59 -0
- data/ext/torchcodec/Frame.cpp +47 -0
- data/ext/torchcodec/Frame.h +72 -0
- data/ext/torchcodec/Metadata.cpp +124 -0
- data/ext/torchcodec/Metadata.h +92 -0
- data/ext/torchcodec/SingleStreamDecoder.cpp +1586 -0
- data/ext/torchcodec/SingleStreamDecoder.h +391 -0
- data/ext/torchcodec/StableABICompat.h +185 -0
- data/ext/torchcodec/StreamOptions.h +70 -0
- data/ext/torchcodec/Transform.cpp +128 -0
- data/ext/torchcodec/Transform.h +86 -0
- data/ext/torchcodec/ValidationUtils.cpp +35 -0
- data/ext/torchcodec/ValidationUtils.h +21 -0
- data/ext/torchcodec/custom_ops.cpp +913 -0
- data/ext/torchcodec/ext.cpp +12 -0
- data/ext/torchcodec/extconf.rb +73 -0
- data/lib/torchcodec/core/metadata.rb +41 -0
- data/lib/torchcodec/decoders/audio_decoder.rb +88 -0
- data/lib/torchcodec/decoders/decoder_utils.rb +11 -0
- data/lib/torchcodec/version.rb +3 -0
- data/lib/torchcodec.rb +28 -0
- metadata +90 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include "Transform.h"
|
|
8
|
+
#include <torch/types.h>
|
|
9
|
+
#include "FFMPEGCommon.h"
|
|
10
|
+
|
|
11
|
+
namespace facebook::torchcodec {
|
|
12
|
+
|
|
13
|
+
namespace {
|
|
14
|
+
|
|
15
|
+
std::string toFilterGraphInterpolation(
|
|
16
|
+
ResizeTransform::InterpolationMode mode) {
|
|
17
|
+
switch (mode) {
|
|
18
|
+
case ResizeTransform::InterpolationMode::BILINEAR:
|
|
19
|
+
return "bilinear";
|
|
20
|
+
default:
|
|
21
|
+
TORCH_CHECK(
|
|
22
|
+
false,
|
|
23
|
+
"Unknown interpolation mode: " +
|
|
24
|
+
std::to_string(static_cast<int>(mode)));
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
int toSwsInterpolation(ResizeTransform::InterpolationMode mode) {
|
|
29
|
+
switch (mode) {
|
|
30
|
+
case ResizeTransform::InterpolationMode::BILINEAR:
|
|
31
|
+
return SWS_BILINEAR;
|
|
32
|
+
default:
|
|
33
|
+
TORCH_CHECK(
|
|
34
|
+
false,
|
|
35
|
+
"Unknown interpolation mode: " +
|
|
36
|
+
std::to_string(static_cast<int>(mode)));
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
} // namespace
|
|
41
|
+
|
|
42
|
+
std::string ResizeTransform::getFilterGraphCpu() const {
|
|
43
|
+
return "scale=" + std::to_string(outputDims_.width) + ":" +
|
|
44
|
+
std::to_string(outputDims_.height) +
|
|
45
|
+
":flags=" + toFilterGraphInterpolation(interpolationMode_);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
std::optional<FrameDims> ResizeTransform::getOutputFrameDims() const {
|
|
49
|
+
return outputDims_;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
bool ResizeTransform::isResize() const {
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
int ResizeTransform::getSwsFlags() const {
|
|
57
|
+
return toSwsInterpolation(interpolationMode_);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
CropTransform::CropTransform(const FrameDims& dims) : outputDims_(dims) {}
|
|
61
|
+
|
|
62
|
+
CropTransform::CropTransform(const FrameDims& dims, int x, int y)
|
|
63
|
+
: outputDims_(dims), x_(x), y_(y) {
|
|
64
|
+
TORCH_CHECK(x_ >= 0, "Crop x position must be >= 0, got: ", x_);
|
|
65
|
+
TORCH_CHECK(y_ >= 0, "Crop y position must be >= 0, got: ", y_);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
std::string CropTransform::getFilterGraphCpu() const {
|
|
69
|
+
// For the FFmpeg filter crop, if the x and y coordinates are left
|
|
70
|
+
// unspecified, it defaults to a center crop.
|
|
71
|
+
std::string coordinates = x_.has_value()
|
|
72
|
+
? (":" + std::to_string(x_.value()) + ":" + std::to_string(y_.value()))
|
|
73
|
+
: "";
|
|
74
|
+
return "crop=" + std::to_string(outputDims_.width) + ":" +
|
|
75
|
+
std::to_string(outputDims_.height) + coordinates + ":exact=1";
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
std::optional<FrameDims> CropTransform::getOutputFrameDims() const {
|
|
79
|
+
return outputDims_;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
void CropTransform::validate(const FrameDims& inputDims) const {
|
|
83
|
+
TORCH_CHECK(
|
|
84
|
+
outputDims_.height <= inputDims.height,
|
|
85
|
+
"Crop output height (",
|
|
86
|
+
outputDims_.height,
|
|
87
|
+
") is greater than input height (",
|
|
88
|
+
inputDims.height,
|
|
89
|
+
")");
|
|
90
|
+
TORCH_CHECK(
|
|
91
|
+
outputDims_.width <= inputDims.width,
|
|
92
|
+
"Crop output width (",
|
|
93
|
+
outputDims_.width,
|
|
94
|
+
") is greater than input width (",
|
|
95
|
+
inputDims.width,
|
|
96
|
+
")");
|
|
97
|
+
TORCH_CHECK(
|
|
98
|
+
x_.has_value() == y_.has_value(),
|
|
99
|
+
"Crop x and y values must be both set or both unset");
|
|
100
|
+
if (x_.has_value()) {
|
|
101
|
+
TORCH_CHECK(
|
|
102
|
+
x_.value() <= inputDims.width,
|
|
103
|
+
"Crop x start position, ",
|
|
104
|
+
x_.value(),
|
|
105
|
+
", out of bounds of input width, ",
|
|
106
|
+
inputDims.width);
|
|
107
|
+
TORCH_CHECK(
|
|
108
|
+
x_.value() + outputDims_.width <= inputDims.width,
|
|
109
|
+
"Crop x end position, ",
|
|
110
|
+
x_.value() + outputDims_.width,
|
|
111
|
+
", out of bounds of input width ",
|
|
112
|
+
inputDims.width);
|
|
113
|
+
TORCH_CHECK(
|
|
114
|
+
y_.value() <= inputDims.height,
|
|
115
|
+
"Crop y start position, ",
|
|
116
|
+
y_.value(),
|
|
117
|
+
", out of bounds of input height, ",
|
|
118
|
+
inputDims.height);
|
|
119
|
+
TORCH_CHECK(
|
|
120
|
+
y_.value() + outputDims_.height <= inputDims.height,
|
|
121
|
+
"Crop y end position, ",
|
|
122
|
+
y_.value() + outputDims_.height,
|
|
123
|
+
", out of bounds of input height ",
|
|
124
|
+
inputDims.height);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
#include <optional>
|
|
10
|
+
#include <string>
|
|
11
|
+
#include "Frame.h"
|
|
12
|
+
#include "Metadata.h"
|
|
13
|
+
|
|
14
|
+
namespace facebook::torchcodec {
|
|
15
|
+
|
|
16
|
+
class Transform {
|
|
17
|
+
public:
|
|
18
|
+
virtual std::string getFilterGraphCpu() const = 0;
|
|
19
|
+
virtual ~Transform() = default;
|
|
20
|
+
|
|
21
|
+
// If the transformation does not change the output frame dimensions, then
|
|
22
|
+
// there is no need to override this member function. The default
|
|
23
|
+
// implementation returns an empty optional, indicating that the output frame
|
|
24
|
+
// has the same dimensions as the input frame.
|
|
25
|
+
//
|
|
26
|
+
// If the transformation does change the output frame dimensions, then it
|
|
27
|
+
// must override this member function and return the output frame dimensions.
|
|
28
|
+
virtual std::optional<FrameDims> getOutputFrameDims() const {
|
|
29
|
+
return std::nullopt;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// The ResizeTransform is special because it is the only transform
|
|
33
|
+
// that swscale can handle.
|
|
34
|
+
virtual bool isResize() const {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// The validity of some transforms depends on the characteristics of the
|
|
39
|
+
// AVStream they're being applied to. For example, some transforms will
|
|
40
|
+
// specify coordinates inside a frame, we need to validate that those are
|
|
41
|
+
// within the frame's bounds.
|
|
42
|
+
//
|
|
43
|
+
// Note that the validation function does not return anything. We expect
|
|
44
|
+
// invalid configurations to throw an exception.
|
|
45
|
+
virtual void validate([[maybe_unused]] const FrameDims& inputDims) const {}
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
class ResizeTransform : public Transform {
|
|
49
|
+
public:
|
|
50
|
+
enum class InterpolationMode { BILINEAR };
|
|
51
|
+
|
|
52
|
+
explicit ResizeTransform(const FrameDims& dims)
|
|
53
|
+
: outputDims_(dims), interpolationMode_(InterpolationMode::BILINEAR) {}
|
|
54
|
+
|
|
55
|
+
ResizeTransform(const FrameDims& dims, InterpolationMode interpolationMode)
|
|
56
|
+
: outputDims_(dims), interpolationMode_(interpolationMode) {}
|
|
57
|
+
|
|
58
|
+
std::string getFilterGraphCpu() const override;
|
|
59
|
+
std::optional<FrameDims> getOutputFrameDims() const override;
|
|
60
|
+
bool isResize() const override;
|
|
61
|
+
|
|
62
|
+
int getSwsFlags() const;
|
|
63
|
+
|
|
64
|
+
private:
|
|
65
|
+
FrameDims outputDims_;
|
|
66
|
+
InterpolationMode interpolationMode_;
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
class CropTransform : public Transform {
|
|
70
|
+
public:
|
|
71
|
+
CropTransform(const FrameDims& dims, int x, int y);
|
|
72
|
+
|
|
73
|
+
// Becomes a center crop if x and y are not specified.
|
|
74
|
+
explicit CropTransform(const FrameDims& dims);
|
|
75
|
+
|
|
76
|
+
std::string getFilterGraphCpu() const override;
|
|
77
|
+
std::optional<FrameDims> getOutputFrameDims() const override;
|
|
78
|
+
void validate(const FrameDims& inputDims) const override;
|
|
79
|
+
|
|
80
|
+
private:
|
|
81
|
+
FrameDims outputDims_;
|
|
82
|
+
std::optional<int> x_;
|
|
83
|
+
std::optional<int> y_;
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include "ValidationUtils.h"
|
|
8
|
+
#include <limits>
|
|
9
|
+
#include "c10/util/Exception.h"
|
|
10
|
+
|
|
11
|
+
namespace facebook::torchcodec {
|
|
12
|
+
|
|
13
|
+
int validateInt64ToInt(int64_t value, const std::string& parameterName) {
|
|
14
|
+
TORCH_CHECK(
|
|
15
|
+
value >= std::numeric_limits<int>::min() &&
|
|
16
|
+
value <= std::numeric_limits<int>::max(),
|
|
17
|
+
parameterName,
|
|
18
|
+
"=",
|
|
19
|
+
value,
|
|
20
|
+
" is out of range for int type.");
|
|
21
|
+
|
|
22
|
+
return static_cast<int>(value);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
std::optional<int> validateOptionalInt64ToInt(
|
|
26
|
+
const std::optional<int64_t>& value,
|
|
27
|
+
const std::string& parameterName) {
|
|
28
|
+
if (value.has_value()) {
|
|
29
|
+
return validateInt64ToInt(value.value(), parameterName);
|
|
30
|
+
} else {
|
|
31
|
+
return std::nullopt;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
#include <cstdint>
|
|
10
|
+
#include <optional>
|
|
11
|
+
#include <string>
|
|
12
|
+
|
|
13
|
+
namespace facebook::torchcodec {
|
|
14
|
+
|
|
15
|
+
int validateInt64ToInt(int64_t value, const std::string& parameterName);
|
|
16
|
+
|
|
17
|
+
std::optional<int> validateOptionalInt64ToInt(
|
|
18
|
+
const std::optional<int64_t>& value,
|
|
19
|
+
const std::string& parameterName);
|
|
20
|
+
|
|
21
|
+
} // namespace facebook::torchcodec
|