torchcodec 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +29 -0
- data/README.md +69 -0
- data/ext/torchcodec/AVIOContextHolder.cpp +60 -0
- data/ext/torchcodec/AVIOContextHolder.h +64 -0
- data/ext/torchcodec/AVIOTensorContext.cpp +130 -0
- data/ext/torchcodec/AVIOTensorContext.h +44 -0
- data/ext/torchcodec/CpuDeviceInterface.cpp +509 -0
- data/ext/torchcodec/CpuDeviceInterface.h +141 -0
- data/ext/torchcodec/DeviceInterface.cpp +117 -0
- data/ext/torchcodec/DeviceInterface.h +191 -0
- data/ext/torchcodec/Encoder.cpp +1054 -0
- data/ext/torchcodec/Encoder.h +192 -0
- data/ext/torchcodec/FFMPEGCommon.cpp +684 -0
- data/ext/torchcodec/FFMPEGCommon.h +314 -0
- data/ext/torchcodec/FilterGraph.cpp +159 -0
- data/ext/torchcodec/FilterGraph.h +59 -0
- data/ext/torchcodec/Frame.cpp +47 -0
- data/ext/torchcodec/Frame.h +72 -0
- data/ext/torchcodec/Metadata.cpp +124 -0
- data/ext/torchcodec/Metadata.h +92 -0
- data/ext/torchcodec/SingleStreamDecoder.cpp +1586 -0
- data/ext/torchcodec/SingleStreamDecoder.h +391 -0
- data/ext/torchcodec/StableABICompat.h +185 -0
- data/ext/torchcodec/StreamOptions.h +70 -0
- data/ext/torchcodec/Transform.cpp +128 -0
- data/ext/torchcodec/Transform.h +86 -0
- data/ext/torchcodec/ValidationUtils.cpp +35 -0
- data/ext/torchcodec/ValidationUtils.h +21 -0
- data/ext/torchcodec/custom_ops.cpp +913 -0
- data/ext/torchcodec/ext.cpp +12 -0
- data/ext/torchcodec/extconf.rb +73 -0
- data/lib/torchcodec/core/metadata.rb +41 -0
- data/lib/torchcodec/decoders/audio_decoder.rb +88 -0
- data/lib/torchcodec/decoders/decoder_utils.rb +11 -0
- data/lib/torchcodec/version.rb +3 -0
- data/lib/torchcodec.rb +28 -0
- metadata +90 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include "Metadata.h"
|
|
8
|
+
#include "torch/types.h"
|
|
9
|
+
|
|
10
|
+
namespace facebook::torchcodec {
|
|
11
|
+
|
|
12
|
+
std::optional<double> StreamMetadata::getDurationSeconds(
|
|
13
|
+
SeekMode seekMode) const {
|
|
14
|
+
switch (seekMode) {
|
|
15
|
+
case SeekMode::custom_frame_mappings:
|
|
16
|
+
case SeekMode::exact:
|
|
17
|
+
TORCH_CHECK(
|
|
18
|
+
endStreamPtsSecondsFromContent.has_value() &&
|
|
19
|
+
beginStreamPtsSecondsFromContent.has_value(),
|
|
20
|
+
"Missing beginStreamPtsSecondsFromContent or endStreamPtsSecondsFromContent");
|
|
21
|
+
return endStreamPtsSecondsFromContent.value() -
|
|
22
|
+
beginStreamPtsSecondsFromContent.value();
|
|
23
|
+
case SeekMode::approximate:
|
|
24
|
+
if (durationSecondsFromHeader.has_value()) {
|
|
25
|
+
return durationSecondsFromHeader.value();
|
|
26
|
+
}
|
|
27
|
+
if (numFramesFromHeader.has_value() && averageFpsFromHeader.has_value() &&
|
|
28
|
+
averageFpsFromHeader.value() != 0.0) {
|
|
29
|
+
return static_cast<double>(numFramesFromHeader.value()) /
|
|
30
|
+
averageFpsFromHeader.value();
|
|
31
|
+
}
|
|
32
|
+
if (durationSecondsFromContainer.has_value()) {
|
|
33
|
+
return durationSecondsFromContainer.value();
|
|
34
|
+
}
|
|
35
|
+
return std::nullopt;
|
|
36
|
+
default:
|
|
37
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
double StreamMetadata::getBeginStreamSeconds(SeekMode seekMode) const {
|
|
42
|
+
switch (seekMode) {
|
|
43
|
+
case SeekMode::custom_frame_mappings:
|
|
44
|
+
case SeekMode::exact:
|
|
45
|
+
TORCH_CHECK(
|
|
46
|
+
beginStreamPtsSecondsFromContent.has_value(),
|
|
47
|
+
"Missing beginStreamPtsSecondsFromContent");
|
|
48
|
+
return beginStreamPtsSecondsFromContent.value();
|
|
49
|
+
case SeekMode::approximate:
|
|
50
|
+
if (beginStreamPtsSecondsFromContent.has_value()) {
|
|
51
|
+
return beginStreamPtsSecondsFromContent.value();
|
|
52
|
+
}
|
|
53
|
+
return 0.0;
|
|
54
|
+
default:
|
|
55
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
std::optional<double> StreamMetadata::getEndStreamSeconds(
|
|
60
|
+
SeekMode seekMode) const {
|
|
61
|
+
switch (seekMode) {
|
|
62
|
+
case SeekMode::custom_frame_mappings:
|
|
63
|
+
case SeekMode::exact:
|
|
64
|
+
TORCH_CHECK(
|
|
65
|
+
endStreamPtsSecondsFromContent.has_value(),
|
|
66
|
+
"Missing endStreamPtsSecondsFromContent");
|
|
67
|
+
return endStreamPtsSecondsFromContent.value();
|
|
68
|
+
case SeekMode::approximate:
|
|
69
|
+
if (endStreamPtsSecondsFromContent.has_value()) {
|
|
70
|
+
return endStreamPtsSecondsFromContent.value();
|
|
71
|
+
}
|
|
72
|
+
return getDurationSeconds(seekMode);
|
|
73
|
+
default:
|
|
74
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
std::optional<int64_t> StreamMetadata::getNumFrames(SeekMode seekMode) const {
|
|
79
|
+
switch (seekMode) {
|
|
80
|
+
case SeekMode::custom_frame_mappings:
|
|
81
|
+
case SeekMode::exact:
|
|
82
|
+
TORCH_CHECK(
|
|
83
|
+
numFramesFromContent.has_value(), "Missing numFramesFromContent");
|
|
84
|
+
return numFramesFromContent.value();
|
|
85
|
+
case SeekMode::approximate: {
|
|
86
|
+
auto durationSeconds = getDurationSeconds(seekMode);
|
|
87
|
+
if (numFramesFromHeader.has_value()) {
|
|
88
|
+
return numFramesFromHeader.value();
|
|
89
|
+
}
|
|
90
|
+
if (averageFpsFromHeader.has_value() && durationSeconds.has_value()) {
|
|
91
|
+
return static_cast<int64_t>(
|
|
92
|
+
averageFpsFromHeader.value() * durationSeconds.value());
|
|
93
|
+
}
|
|
94
|
+
return std::nullopt;
|
|
95
|
+
}
|
|
96
|
+
default:
|
|
97
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
std::optional<double> StreamMetadata::getAverageFps(SeekMode seekMode) const {
|
|
102
|
+
switch (seekMode) {
|
|
103
|
+
case SeekMode::custom_frame_mappings:
|
|
104
|
+
case SeekMode::exact: {
|
|
105
|
+
auto numFrames = getNumFrames(seekMode);
|
|
106
|
+
if (numFrames.has_value() &&
|
|
107
|
+
beginStreamPtsSecondsFromContent.has_value() &&
|
|
108
|
+
endStreamPtsSecondsFromContent.has_value()) {
|
|
109
|
+
double duration = endStreamPtsSecondsFromContent.value() -
|
|
110
|
+
beginStreamPtsSecondsFromContent.value();
|
|
111
|
+
if (duration != 0.0) {
|
|
112
|
+
return static_cast<double>(numFrames.value()) / duration;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return averageFpsFromHeader;
|
|
116
|
+
}
|
|
117
|
+
case SeekMode::approximate:
|
|
118
|
+
return averageFpsFromHeader;
|
|
119
|
+
default:
|
|
120
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
#include <optional>
|
|
10
|
+
#include <string>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
extern "C" {
|
|
14
|
+
#include <libavcodec/avcodec.h>
|
|
15
|
+
#include <libavutil/avutil.h>
|
|
16
|
+
#include <libavutil/rational.h>
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
namespace facebook::torchcodec {
|
|
20
|
+
|
|
21
|
+
enum class SeekMode { exact, approximate, custom_frame_mappings };
|
|
22
|
+
|
|
23
|
+
struct StreamMetadata {
|
|
24
|
+
// Common (video and audio) fields derived from the AVStream.
|
|
25
|
+
int streamIndex = -1;
|
|
26
|
+
|
|
27
|
+
// See this link for what various values are available:
|
|
28
|
+
// https://ffmpeg.org/doxygen/trunk/group__lavu__misc.html#ga9a84bba4713dfced21a1a56163be1f48
|
|
29
|
+
AVMediaType mediaType = AVMEDIA_TYPE_UNKNOWN;
|
|
30
|
+
|
|
31
|
+
std::optional<AVCodecID> codecId;
|
|
32
|
+
std::optional<std::string> codecName;
|
|
33
|
+
std::optional<double> durationSecondsFromHeader;
|
|
34
|
+
std::optional<double> beginStreamSecondsFromHeader;
|
|
35
|
+
std::optional<int64_t> numFramesFromHeader;
|
|
36
|
+
std::optional<int64_t> numKeyFrames;
|
|
37
|
+
std::optional<double> averageFpsFromHeader;
|
|
38
|
+
std::optional<double> bitRate;
|
|
39
|
+
|
|
40
|
+
// Used as fallback in approximate mode when stream duration is unavailable.
|
|
41
|
+
std::optional<double> durationSecondsFromContainer;
|
|
42
|
+
|
|
43
|
+
// More accurate duration, obtained by scanning the file.
|
|
44
|
+
// These presentation timestamps are in time base.
|
|
45
|
+
std::optional<int64_t> beginStreamPtsFromContent;
|
|
46
|
+
std::optional<int64_t> endStreamPtsFromContent;
|
|
47
|
+
|
|
48
|
+
// These presentation timestamps are in seconds.
|
|
49
|
+
std::optional<double> beginStreamPtsSecondsFromContent;
|
|
50
|
+
std::optional<double> endStreamPtsSecondsFromContent;
|
|
51
|
+
|
|
52
|
+
// This can be useful for index-based seeking.
|
|
53
|
+
std::optional<int64_t> numFramesFromContent;
|
|
54
|
+
|
|
55
|
+
// Video-only fields
|
|
56
|
+
std::optional<int> width;
|
|
57
|
+
std::optional<int> height;
|
|
58
|
+
std::optional<AVRational> sampleAspectRatio;
|
|
59
|
+
|
|
60
|
+
// Audio-only fields
|
|
61
|
+
std::optional<int64_t> sampleRate;
|
|
62
|
+
std::optional<int64_t> numChannels;
|
|
63
|
+
std::optional<std::string> sampleFormat;
|
|
64
|
+
|
|
65
|
+
// Computed methods with fallback logic
|
|
66
|
+
std::optional<double> getDurationSeconds(SeekMode seekMode) const;
|
|
67
|
+
double getBeginStreamSeconds(SeekMode seekMode) const;
|
|
68
|
+
std::optional<double> getEndStreamSeconds(SeekMode seekMode) const;
|
|
69
|
+
std::optional<int64_t> getNumFrames(SeekMode seekMode) const;
|
|
70
|
+
std::optional<double> getAverageFps(SeekMode seekMode) const;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
struct ContainerMetadata {
|
|
74
|
+
std::vector<StreamMetadata> allStreamMetadata;
|
|
75
|
+
int numAudioStreams = 0;
|
|
76
|
+
int numVideoStreams = 0;
|
|
77
|
+
|
|
78
|
+
// Note that this is the container-level duration, which is usually the max
|
|
79
|
+
// of all stream durations available in the container.
|
|
80
|
+
std::optional<double> durationSecondsFromHeader;
|
|
81
|
+
|
|
82
|
+
// Total BitRate level information at the container level in bit/s
|
|
83
|
+
std::optional<double> bitRate;
|
|
84
|
+
|
|
85
|
+
// If set, this is the index to the default audio stream.
|
|
86
|
+
std::optional<int> bestAudioStreamIndex;
|
|
87
|
+
|
|
88
|
+
// If set, this is the index to the default video stream.
|
|
89
|
+
std::optional<int> bestVideoStreamIndex;
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
} // namespace facebook::torchcodec
|