react-native-sherpa-onnx 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -77
- package/SherpaOnnx.podspec +79 -45
- package/android/build.gradle +8 -2
- package/android/prebuilt-download.gradle +70 -16
- package/android/prebuilt-versions.gradle +14 -6
- package/android/src/main/cpp/CMakeLists.txt +2 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +202 -328
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +22 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +2 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +96 -142
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +40 -4
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +774 -316
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +208 -122
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +92 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +3 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +14 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +229 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +144 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +1 -1
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +157 -11
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt +150 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +75 -24
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +52 -1
- package/ios/SherpaOnnx+PcmLiveStream.mm +288 -0
- package/ios/SherpaOnnx+STT.mm +2 -0
- package/ios/SherpaOnnx+TTS.mm +17 -0
- package/ios/SherpaOnnx.mm +27 -3
- package/ios/SherpaOnnxAudioConvert.h +28 -0
- package/ios/SherpaOnnxAudioConvert.mm +698 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +12 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +37 -3
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +80 -45
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +629 -267
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +148 -56
- package/ios/model_detect/sherpa-onnx-model-detect.h +72 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.mm +229 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.mm +144 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +4 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +55 -1
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +14 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -1
- package/lib/module/index.js +10 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/stt/streaming.js +6 -3
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/tts/index.js +13 -1
- package/lib/module/tts/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +32 -3
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +20 -1
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +2 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +10 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +12 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/package.json +6 -1
- package/scripts/check-model-csvs.sh +72 -0
- package/scripts/setup-ios-framework.sh +272 -191
- package/src/NativeSherpaOnnx.ts +37 -3
- package/src/audio/index.ts +84 -1
- package/src/download/ModelDownloadManager.ts +19 -0
- package/src/index.tsx +15 -0
- package/src/stt/streaming.ts +10 -5
- package/src/stt/streamingTypes.ts +1 -1
- package/src/tts/index.ts +25 -1
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
- package/ios/scripts/patch-libarchive-includes.sh +0 -61
- package/ios/scripts/setup-ios-libarchive.sh +0 -98
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include <android/log.h>
|
|
9
9
|
#include <jni.h>
|
|
10
10
|
#include <string>
|
|
11
|
+
#include <sys/stat.h>
|
|
11
12
|
|
|
12
13
|
#define LOG_TAG "AudioConvertJNI"
|
|
13
14
|
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
|
|
@@ -19,316 +20,48 @@ extern "C" {
|
|
|
19
20
|
#include <libavcodec/avcodec.h>
|
|
20
21
|
#include <libavformat/avformat.h>
|
|
21
22
|
#include <libavutil/opt.h>
|
|
23
|
+
#include <libavutil/error.h>
|
|
22
24
|
#include <libswresample/swresample.h>
|
|
23
25
|
}
|
|
24
26
|
#include <cstdio>
|
|
25
27
|
#include <vector>
|
|
26
28
|
#endif
|
|
27
29
|
|
|
28
|
-
//
|
|
29
|
-
|
|
30
|
-
static std::string convertToWav16kMono(const char* inputPath, const char* outputPath) {
|
|
31
|
-
#ifdef HAVE_FFMPEG
|
|
32
|
-
// Implement a basic decode -> resample -> write WAV pipeline using libav* APIs.
|
|
33
|
-
av_log_set_level(AV_LOG_ERROR);
|
|
34
|
-
|
|
35
|
-
AVFormatContext* inFmt = nullptr;
|
|
36
|
-
if (avformat_open_input(&inFmt, inputPath, nullptr, nullptr) < 0) {
|
|
37
|
-
return std::string("Failed to open input file");
|
|
38
|
-
}
|
|
39
|
-
if (avformat_find_stream_info(inFmt, nullptr) < 0) {
|
|
40
|
-
avformat_close_input(&inFmt);
|
|
41
|
-
return std::string("Failed to find stream info");
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
int audioStreamIndex = -1;
|
|
45
|
-
for (unsigned i = 0; i < inFmt->nb_streams; ++i) {
|
|
46
|
-
if (inFmt->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
|
|
47
|
-
audioStreamIndex = i;
|
|
48
|
-
break;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
if (audioStreamIndex < 0) {
|
|
52
|
-
avformat_close_input(&inFmt);
|
|
53
|
-
return std::string("No audio stream found in input");
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
AVStream* inStream = inFmt->streams[audioStreamIndex];
|
|
57
|
-
const AVCodec* decoder = avcodec_find_decoder(inStream->codecpar->codec_id);
|
|
58
|
-
if (!decoder) {
|
|
59
|
-
avformat_close_input(&inFmt);
|
|
60
|
-
return std::string("Unsupported input codec");
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
AVCodecContext* decCtx = avcodec_alloc_context3(decoder);
|
|
64
|
-
if (!decCtx) {
|
|
65
|
-
avformat_close_input(&inFmt);
|
|
66
|
-
return std::string("Failed to allocate decoder context");
|
|
67
|
-
}
|
|
68
|
-
if (avcodec_parameters_to_context(decCtx, inStream->codecpar) < 0) {
|
|
69
|
-
avcodec_free_context(&decCtx);
|
|
70
|
-
avformat_close_input(&inFmt);
|
|
71
|
-
return std::string("Failed to copy codec parameters");
|
|
72
|
-
}
|
|
73
|
-
if (avcodec_open2(decCtx, decoder, nullptr) < 0) {
|
|
74
|
-
avcodec_free_context(&decCtx);
|
|
75
|
-
avformat_close_input(&inFmt);
|
|
76
|
-
return std::string("Failed to open decoder");
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// Prepare resampler to 16k mono s16 using AVChannelLayout helpers
|
|
80
|
-
SwrContext* swr = nullptr;
|
|
81
|
-
AVChannelLayout out_ch_layout = AV_CHANNEL_LAYOUT_MONO;
|
|
82
|
-
AVChannelLayout in_ch_layout;
|
|
83
|
-
// Prefer codecpar ch_layout when available, otherwise fall back to decoder ctx
|
|
84
|
-
if (inStream->codecpar->ch_layout.nb_channels) {
|
|
85
|
-
if (av_channel_layout_copy(&in_ch_layout, &inStream->codecpar->ch_layout) < 0) {
|
|
86
|
-
avcodec_free_context(&decCtx);
|
|
87
|
-
avformat_close_input(&inFmt);
|
|
88
|
-
return std::string("Failed to copy input channel layout");
|
|
89
|
-
}
|
|
90
|
-
} else {
|
|
91
|
-
if (av_channel_layout_copy(&in_ch_layout, &decCtx->ch_layout) < 0) {
|
|
92
|
-
avcodec_free_context(&decCtx);
|
|
93
|
-
avformat_close_input(&inFmt);
|
|
94
|
-
return std::string("Failed to initialize input channel layout");
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
if (swr_alloc_set_opts2(&swr,
|
|
98
|
-
&out_ch_layout, AV_SAMPLE_FMT_S16, 16000,
|
|
99
|
-
&in_ch_layout, (AVSampleFormat)decCtx->sample_fmt, decCtx->sample_rate,
|
|
100
|
-
0, nullptr) < 0 || !swr) {
|
|
101
|
-
av_channel_layout_uninit(&in_ch_layout);
|
|
102
|
-
if (swr) swr_free(&swr);
|
|
103
|
-
avcodec_free_context(&decCtx);
|
|
104
|
-
avformat_close_input(&inFmt);
|
|
105
|
-
return std::string("Failed to initialize resampler");
|
|
106
|
-
}
|
|
107
|
-
av_channel_layout_uninit(&in_ch_layout);
|
|
108
|
-
|
|
109
|
-
// Prepare output WAV via avformat
|
|
110
|
-
AVFormatContext* outFmt = nullptr;
|
|
111
|
-
if (avformat_alloc_output_context2(&outFmt, nullptr, nullptr, outputPath) < 0 || !outFmt) {
|
|
112
|
-
swr_free(&swr);
|
|
113
|
-
avcodec_free_context(&decCtx);
|
|
114
|
-
avformat_close_input(&inFmt);
|
|
115
|
-
return std::string("Failed to allocate output context");
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
const AVCodec* pcmCodec = avcodec_find_encoder(AV_CODEC_ID_PCM_S16LE);
|
|
119
|
-
if (!pcmCodec) {
|
|
120
|
-
avformat_free_context(outFmt);
|
|
121
|
-
swr_free(&swr);
|
|
122
|
-
avcodec_free_context(&decCtx);
|
|
123
|
-
avformat_close_input(&inFmt);
|
|
124
|
-
return std::string("PCM encoder not found");
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
AVStream* outStream = avformat_new_stream(outFmt, nullptr);
|
|
128
|
-
if (!outStream) {
|
|
129
|
-
avformat_free_context(outFmt);
|
|
130
|
-
swr_free(&swr);
|
|
131
|
-
avcodec_free_context(&decCtx);
|
|
132
|
-
avformat_close_input(&inFmt);
|
|
133
|
-
return std::string("Failed to create output stream");
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
AVCodecContext* encCtx = avcodec_alloc_context3(pcmCodec);
|
|
137
|
-
// Configure encoder context for mono 16k s16 output
|
|
138
|
-
AVChannelLayout mono_layout = AV_CHANNEL_LAYOUT_MONO;
|
|
139
|
-
if (!encCtx) {
|
|
140
|
-
avformat_free_context(outFmt);
|
|
141
|
-
swr_free(&swr);
|
|
142
|
-
avcodec_free_context(&decCtx);
|
|
143
|
-
avformat_close_input(&inFmt);
|
|
144
|
-
return std::string("Failed to allocate encoder context");
|
|
145
|
-
}
|
|
146
|
-
if (av_channel_layout_copy(&encCtx->ch_layout, &mono_layout) < 0) {
|
|
147
|
-
avcodec_free_context(&encCtx);
|
|
148
|
-
avformat_free_context(outFmt);
|
|
149
|
-
swr_free(&swr);
|
|
150
|
-
avcodec_free_context(&decCtx);
|
|
151
|
-
avformat_close_input(&inFmt);
|
|
152
|
-
return std::string("Failed to set encoder channel layout");
|
|
153
|
-
}
|
|
154
|
-
encCtx->sample_rate = 16000;
|
|
155
|
-
encCtx->sample_fmt = AV_SAMPLE_FMT_S16;
|
|
156
|
-
encCtx->bit_rate = 16 * 16000; // rough
|
|
157
|
-
|
|
158
|
-
if (outFmt->oformat->flags & AVFMT_GLOBALHEADER) encCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
|
159
|
-
|
|
160
|
-
if (avcodec_open2(encCtx, pcmCodec, nullptr) < 0) {
|
|
161
|
-
avcodec_free_context(&encCtx);
|
|
162
|
-
avformat_free_context(outFmt);
|
|
163
|
-
swr_free(&swr);
|
|
164
|
-
avcodec_free_context(&decCtx);
|
|
165
|
-
avformat_close_input(&inFmt);
|
|
166
|
-
return std::string("Failed to open PCM encoder");
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
if (avcodec_parameters_from_context(outStream->codecpar, encCtx) < 0) {
|
|
170
|
-
avcodec_free_context(&encCtx);
|
|
171
|
-
avformat_free_context(outFmt);
|
|
172
|
-
swr_free(&swr);
|
|
173
|
-
avcodec_free_context(&decCtx);
|
|
174
|
-
avformat_close_input(&inFmt);
|
|
175
|
-
return std::string("Failed to set output stream parameters");
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
if (!(outFmt->oformat->flags & AVFMT_NOFILE)) {
|
|
179
|
-
if (avio_open(&outFmt->pb, outputPath, AVIO_FLAG_WRITE) < 0) {
|
|
180
|
-
avcodec_free_context(&encCtx);
|
|
181
|
-
avformat_free_context(outFmt);
|
|
182
|
-
swr_free(&swr);
|
|
183
|
-
avcodec_free_context(&decCtx);
|
|
184
|
-
avformat_close_input(&inFmt);
|
|
185
|
-
return std::string("Failed to open output file for writing");
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
if (avformat_write_header(outFmt, nullptr) < 0) {
|
|
190
|
-
if (!(outFmt->oformat->flags & AVFMT_NOFILE)) avio_closep(&outFmt->pb);
|
|
191
|
-
avcodec_free_context(&encCtx);
|
|
192
|
-
avformat_free_context(outFmt);
|
|
193
|
-
swr_free(&swr);
|
|
194
|
-
avcodec_free_context(&decCtx);
|
|
195
|
-
avformat_close_input(&inFmt);
|
|
196
|
-
return std::string("Failed to write output header");
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
AVPacket* pkt = av_packet_alloc();
|
|
200
|
-
AVFrame* frame = av_frame_alloc();
|
|
201
|
-
AVFrame* resampled = av_frame_alloc();
|
|
202
|
-
// Configure resampled frame metadata
|
|
203
|
-
resampled->format = AV_SAMPLE_FMT_S16;
|
|
204
|
-
resampled->sample_rate = 16000;
|
|
205
|
-
// set channel layout on frame
|
|
206
|
-
AVChannelLayout out_ch_layout_local = AV_CHANNEL_LAYOUT_MONO;
|
|
207
|
-
if (av_channel_layout_copy(&resampled->ch_layout, &out_ch_layout_local) < 0) {
|
|
208
|
-
av_frame_free(&frame);
|
|
209
|
-
av_frame_free(&resampled);
|
|
210
|
-
swr_free(&swr);
|
|
211
|
-
avcodec_free_context(&decCtx);
|
|
212
|
-
avformat_close_input(&inFmt);
|
|
213
|
-
return std::string("Failed to set resampled frame channel layout");
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
// Buffer for resampled data will be allocated per needed samples
|
|
217
|
-
|
|
218
|
-
while (av_read_frame(inFmt, pkt) >= 0) {
|
|
219
|
-
if (pkt->stream_index == audioStreamIndex) {
|
|
220
|
-
if (avcodec_send_packet(decCtx, pkt) == 0) {
|
|
221
|
-
while (avcodec_receive_frame(decCtx, frame) == 0) {
|
|
222
|
-
// Resample
|
|
223
|
-
int in_sr = inStream->codecpar->sample_rate ? inStream->codecpar->sample_rate : decCtx->sample_rate;
|
|
224
|
-
int64_t out_nb_samples = av_rescale_rnd(swr_get_delay(swr, in_sr) + frame->nb_samples, 16000, in_sr, AV_ROUND_UP);
|
|
225
|
-
uint8_t** outData = nullptr;
|
|
226
|
-
int out_channels = resampled->ch_layout.nb_channels;
|
|
227
|
-
if (out_channels <= 0) out_channels = 1;
|
|
228
|
-
if (av_samples_alloc_array_and_samples(&outData, nullptr, out_channels, (int)out_nb_samples, AV_SAMPLE_FMT_S16, 0) < 0) {
|
|
229
|
-
av_packet_unref(pkt);
|
|
230
|
-
continue;
|
|
231
|
-
}
|
|
232
|
-
int converted = swr_convert(swr, outData, (int)out_nb_samples, (const uint8_t**)frame->data, frame->nb_samples);
|
|
233
|
-
if (converted < 0) {
|
|
234
|
-
av_freep(&outData[0]);
|
|
235
|
-
av_freep(&outData);
|
|
236
|
-
continue;
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
// prepare frame for encoder
|
|
240
|
-
resampled->nb_samples = converted;
|
|
241
|
-
if (av_frame_get_buffer(resampled, 0) < 0) {
|
|
242
|
-
av_freep(&outData[0]);
|
|
243
|
-
av_freep(&outData);
|
|
244
|
-
continue;
|
|
245
|
-
}
|
|
246
|
-
// copy data into resampled frame
|
|
247
|
-
int bytes_per_sample = av_get_bytes_per_sample((AVSampleFormat)resampled->format);
|
|
248
|
-
int copy_size = converted * bytes_per_sample * out_channels;
|
|
249
|
-
memcpy(resampled->data[0], outData[0], copy_size);
|
|
250
|
-
|
|
251
|
-
// send to encoder
|
|
252
|
-
if (avcodec_send_frame(encCtx, resampled) == 0) {
|
|
253
|
-
AVPacket* outPkt = av_packet_alloc();
|
|
254
|
-
while (avcodec_receive_packet(encCtx, outPkt) == 0) {
|
|
255
|
-
outPkt->stream_index = outStream->index;
|
|
256
|
-
av_packet_rescale_ts(outPkt, encCtx->time_base, outStream->time_base);
|
|
257
|
-
av_interleaved_write_frame(outFmt, outPkt);
|
|
258
|
-
av_packet_unref(outPkt);
|
|
259
|
-
}
|
|
260
|
-
av_packet_free(&outPkt);
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
av_freep(&outData[0]);
|
|
264
|
-
av_freep(&outData);
|
|
265
|
-
av_frame_unref(resampled);
|
|
266
|
-
av_frame_unref(frame);
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
av_packet_unref(pkt);
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
// Flush encoder
|
|
274
|
-
avcodec_send_frame(encCtx, nullptr);
|
|
275
|
-
AVPacket* outPkt = av_packet_alloc();
|
|
276
|
-
while (avcodec_receive_packet(encCtx, outPkt) == 0) {
|
|
277
|
-
outPkt->stream_index = outStream->index;
|
|
278
|
-
av_packet_rescale_ts(outPkt, encCtx->time_base, outStream->time_base);
|
|
279
|
-
av_interleaved_write_frame(outFmt, outPkt);
|
|
280
|
-
av_packet_unref(outPkt);
|
|
281
|
-
}
|
|
282
|
-
av_packet_free(&outPkt);
|
|
283
|
-
|
|
284
|
-
av_write_trailer(outFmt);
|
|
285
|
-
if (!(outFmt->oformat->flags & AVFMT_NOFILE)) avio_closep(&outFmt->pb);
|
|
30
|
+
// Forward declaration — convertToFormat handles all formats including WAV (16 kHz mono).
|
|
31
|
+
static std::string convertToFormat(const char* inputPath, const char* outputPath, const char* formatHint, int outputSampleRateHz);
|
|
286
32
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
av_frame_free(&resampled);
|
|
291
|
-
|
|
292
|
-
swr_free(&swr);
|
|
293
|
-
avcodec_free_context(&encCtx);
|
|
294
|
-
avformat_free_context(outFmt);
|
|
295
|
-
avcodec_free_context(&decCtx);
|
|
296
|
-
avformat_close_input(&inFmt);
|
|
297
|
-
|
|
298
|
-
return std::string("");
|
|
299
|
-
#else
|
|
300
|
-
(void)inputPath;
|
|
301
|
-
(void)outputPath;
|
|
302
|
-
return "FFmpeg not available. Build prebuilts with third_party/ffmpeg_prebuilt/build_ffmpeg.ps1 or build_ffmpeg.sh.";
|
|
303
|
-
#endif
|
|
33
|
+
// Convenience: convert any audio to 16 kHz mono WAV via the main convertToFormat pipeline.
|
|
34
|
+
static std::string convertToWav16kMono(const char* inputPath, const char* outputPath) {
|
|
35
|
+
return convertToFormat(inputPath, outputPath, "wav", 16000);
|
|
304
36
|
}
|
|
305
37
|
|
|
306
38
|
// Generic conversion: supports writing WAV/MP3/FLAC depending on output file extension and linked encoders.
|
|
307
|
-
// WAV
|
|
39
|
+
// WAV output is 16 kHz mono PCM (sherpa-onnx). outputSampleRateHz is only used for MP3 (libshine: 32000/44100/48000); 0 = default 44100.
|
|
308
40
|
static std::string convertToFormat(const char* inputPath, const char* outputPath, const char* formatHint, int outputSampleRateHz) {
|
|
309
41
|
#ifdef HAVE_FFMPEG
|
|
310
|
-
// WAV output is always 16 kHz mono via convertToWav16kMono (sherpa-onnx). Input WAV at 16k is resampled 16k->16k (no change).
|
|
311
42
|
std::string fmt(formatHint ? formatHint : "");
|
|
312
|
-
|
|
313
|
-
return convertToWav16kMono(inputPath, outputPath);
|
|
314
|
-
}
|
|
43
|
+
bool isWav = (fmt == "wav" || fmt == "wav16k" || fmt.empty());
|
|
315
44
|
|
|
316
|
-
// Try to determine codec id from format hint
|
|
317
45
|
AVCodecID codec_id = AV_CODEC_ID_NONE;
|
|
318
|
-
if (
|
|
46
|
+
if (isWav) codec_id = AV_CODEC_ID_PCM_S16LE;
|
|
47
|
+
else if (fmt == "mp3") codec_id = AV_CODEC_ID_MP3;
|
|
319
48
|
else if (fmt == "flac") codec_id = AV_CODEC_ID_FLAC;
|
|
320
|
-
else
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
}
|
|
49
|
+
else if (fmt == "m4a" || fmt == "aac") codec_id = AV_CODEC_ID_AAC;
|
|
50
|
+
else if (fmt == "opus" || fmt == "oggm" || fmt == "ogg" || fmt == "webm" || fmt == "mkv") codec_id = AV_CODEC_ID_OPUS;
|
|
51
|
+
else codec_id = AV_CODEC_ID_PCM_S16LE;
|
|
324
52
|
|
|
325
53
|
// The implementation for generic encoding uses the same decode+resample pipeline
|
|
326
54
|
// but selects encoder by codec_id and creates an output container based on file extension.
|
|
327
55
|
// For brevity we reuse much of the WAV path but change encoder selection.
|
|
328
56
|
|
|
57
|
+
struct stat stIn = {};
|
|
58
|
+
long inputSizeBytes = (stat(inputPath, &stIn) == 0 && S_ISREG(stIn.st_mode)) ? (long)stIn.st_size : -1;
|
|
59
|
+
LOGI("convertToFormat: inputPath=%s inputSizeBytes=%ld format=%s outputPath=%s", inputPath ? inputPath : "(null)", inputSizeBytes, formatHint ? formatHint : "", outputPath ? outputPath : "(null)");
|
|
60
|
+
|
|
329
61
|
// Open input
|
|
330
62
|
AVFormatContext* inFmt = nullptr;
|
|
331
63
|
if (avformat_open_input(&inFmt, inputPath, nullptr, nullptr) < 0) {
|
|
64
|
+
LOGE("Failed to open input file (generic): inputPath=%s", inputPath ? inputPath : "(null)");
|
|
332
65
|
return std::string("Failed to open input file");
|
|
333
66
|
}
|
|
334
67
|
if (avformat_find_stream_info(inFmt, nullptr) < 0) {
|
|
@@ -394,6 +127,15 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
394
127
|
avformat_close_input(&inFmt);
|
|
395
128
|
return std::string("libshine encoder not available in this build");
|
|
396
129
|
}
|
|
130
|
+
} else if (codec_id == AV_CODEC_ID_OPUS) {
|
|
131
|
+
encoder = avcodec_find_encoder_by_name("libopus");
|
|
132
|
+
if (!encoder) {
|
|
133
|
+
avformat_free_context(outFmt);
|
|
134
|
+
swr_free(&swr);
|
|
135
|
+
avcodec_free_context(&decCtx);
|
|
136
|
+
avformat_close_input(&inFmt);
|
|
137
|
+
return std::string("libopus encoder not available in this build");
|
|
138
|
+
}
|
|
397
139
|
} else {
|
|
398
140
|
encoder = avcodec_find_encoder(codec_id);
|
|
399
141
|
if (!encoder) {
|
|
@@ -461,6 +203,15 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
461
203
|
// Set sample rate from input/decoder if not already set
|
|
462
204
|
encCtx->sample_rate = inStream->codecpar->sample_rate ? inStream->codecpar->sample_rate : decCtx->sample_rate;
|
|
463
205
|
|
|
206
|
+
// WAV output: force 16 kHz mono S16 (sherpa-onnx STT requirement)
|
|
207
|
+
if (isWav) {
|
|
208
|
+
encCtx->sample_rate = 16000;
|
|
209
|
+
encCtx->sample_fmt = AV_SAMPLE_FMT_S16;
|
|
210
|
+
av_channel_layout_uninit(&encCtx->ch_layout);
|
|
211
|
+
AVChannelLayout mono = AV_CHANNEL_LAYOUT_MONO;
|
|
212
|
+
av_channel_layout_copy(&encCtx->ch_layout, &mono);
|
|
213
|
+
}
|
|
214
|
+
|
|
464
215
|
// Probe encoder-supported configurations (sample formats, sample rates, channel layouts)
|
|
465
216
|
AVSampleFormat chosen_fmt = AV_SAMPLE_FMT_NONE;
|
|
466
217
|
const void *fmt_configs = nullptr;
|
|
@@ -475,13 +226,8 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
475
226
|
int chl_num = 0;
|
|
476
227
|
avcodec_get_supported_config(encCtx, encoder, AV_CODEC_CONFIG_CHANNEL_LAYOUT, 0, &chl_configs, &chl_num);
|
|
477
228
|
|
|
478
|
-
// Log supported sample formats
|
|
479
229
|
if (fmt_configs && fmt_num > 0) {
|
|
480
230
|
const AVSampleFormat *fmts = (const AVSampleFormat *)fmt_configs;
|
|
481
|
-
for (int i = 0; i < fmt_num; ++i) {
|
|
482
|
-
const char *name = av_get_sample_fmt_name(fmts[i]);
|
|
483
|
-
LOGI("encoder supported fmt[%d]=%s", i, name ? name : "?");
|
|
484
|
-
}
|
|
485
231
|
// prefer interleaved S16, then planar S16P, then decoder fmt, then first
|
|
486
232
|
for (int i = 0; i < fmt_num; ++i) if (fmts[i] == AV_SAMPLE_FMT_S16) { chosen_fmt = AV_SAMPLE_FMT_S16; break; }
|
|
487
233
|
if (chosen_fmt == AV_SAMPLE_FMT_NONE && codec_id == AV_CODEC_ID_MP3) {
|
|
@@ -492,7 +238,8 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
492
238
|
}
|
|
493
239
|
if (chosen_fmt == AV_SAMPLE_FMT_NONE && fmt_num > 0) chosen_fmt = fmts[0];
|
|
494
240
|
} else {
|
|
495
|
-
// libshine only supports S16P; default to S16P for MP3 so open succeeds
|
|
241
|
+
// libshine only supports S16P; default to S16P for MP3 so open succeeds.
|
|
242
|
+
// If AAC, it might prefer FLTP, which `chosen_fmt = fmts[0]` captures above if available.
|
|
496
243
|
chosen_fmt = (codec_id == AV_CODEC_ID_MP3) ? AV_SAMPLE_FMT_S16P : AV_SAMPLE_FMT_S16;
|
|
497
244
|
}
|
|
498
245
|
encCtx->sample_fmt = chosen_fmt;
|
|
@@ -502,7 +249,6 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
502
249
|
const int *srs = (const int*)sr_configs;
|
|
503
250
|
int pick_sr = 0;
|
|
504
251
|
for (int i = 0; i < sr_num; ++i) {
|
|
505
|
-
LOGI("encoder supported sample_rate[%d]=%d", i, srs[i]);
|
|
506
252
|
if (srs[i] == encCtx->sample_rate) { pick_sr = srs[i]; break; }
|
|
507
253
|
}
|
|
508
254
|
if (pick_sr == 0) pick_sr = srs[0];
|
|
@@ -511,10 +257,11 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
511
257
|
// libshine only supports 32000, 44100, 48000 Hz. Use outputSampleRateHz if valid (32000/44100/48000), else default 44100.
|
|
512
258
|
if (codec_id == AV_CODEC_ID_MP3) {
|
|
513
259
|
int want = (outputSampleRateHz == 32000 || outputSampleRateHz == 44100 || outputSampleRateHz == 48000) ? outputSampleRateHz : 44100;
|
|
514
|
-
if (encCtx->sample_rate != want)
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
260
|
+
if (encCtx->sample_rate != want) encCtx->sample_rate = want;
|
|
261
|
+
}
|
|
262
|
+
if (codec_id == AV_CODEC_ID_OPUS) {
|
|
263
|
+
int want = (outputSampleRateHz == 8000 || outputSampleRateHz == 12000 || outputSampleRateHz == 16000 || outputSampleRateHz == 24000 || outputSampleRateHz == 48000) ? outputSampleRateHz : 48000;
|
|
264
|
+
if (encCtx->sample_rate != want) encCtx->sample_rate = want;
|
|
518
265
|
}
|
|
519
266
|
|
|
520
267
|
// If supported channel layouts given, prefer matching channels else pick first
|
|
@@ -523,9 +270,6 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
523
270
|
int pick_nb = 0;
|
|
524
271
|
for (int i = 0; i < chl_num; ++i) {
|
|
525
272
|
const AVChannelLayout *l = &layouts[i];
|
|
526
|
-
char buf[128];
|
|
527
|
-
av_channel_layout_describe(l, buf, sizeof(buf));
|
|
528
|
-
LOGI("encoder supported ch_layout[%d]=%s nb_channels=%d", i, buf, l->nb_channels);
|
|
529
273
|
if (l->nb_channels == encCtx->ch_layout.nb_channels) { pick_nb = l->nb_channels; break; }
|
|
530
274
|
}
|
|
531
275
|
if (pick_nb == 0) pick_nb = layouts[0].nb_channels > 0 ? layouts[0].nb_channels : 1;
|
|
@@ -548,7 +292,7 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
548
292
|
}
|
|
549
293
|
|
|
550
294
|
// Set a sensible default bitrate for compressed codecs
|
|
551
|
-
if (codec_id == AV_CODEC_ID_MP3 || codec_id == AV_CODEC_ID_AAC) encCtx->bit_rate = 128000;
|
|
295
|
+
if (codec_id == AV_CODEC_ID_MP3 || codec_id == AV_CODEC_ID_AAC || codec_id == AV_CODEC_ID_OPUS) encCtx->bit_rate = 128000;
|
|
552
296
|
else encCtx->bit_rate = 0; // lossless or PCM may ignore
|
|
553
297
|
|
|
554
298
|
if (outFmt->oformat->flags & AVFMT_GLOBALHEADER) encCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
|
@@ -614,10 +358,10 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
614
358
|
}
|
|
615
359
|
}
|
|
616
360
|
|
|
617
|
-
// Last resort: try S16 then
|
|
361
|
+
// Last resort: try S16, S16P, then FLTP (for AAC etc.)
|
|
618
362
|
if (ret < 0) {
|
|
619
|
-
AVSampleFormat fallbacks[] = { AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P };
|
|
620
|
-
for (int fi = 0; fi <
|
|
363
|
+
AVSampleFormat fallbacks[] = { AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLTP };
|
|
364
|
+
for (int fi = 0; fi < 3 && ret < 0; ++fi) {
|
|
621
365
|
encCtx->sample_fmt = fallbacks[fi];
|
|
622
366
|
AVDictionary *try_opts = nullptr;
|
|
623
367
|
snprintf(tmpbuf, sizeof(tmpbuf), "%d", encCtx->ch_layout.nb_channels > 0 ? encCtx->ch_layout.nb_channels : 1);
|
|
@@ -742,73 +486,203 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
742
486
|
avformat_close_input(&inFmt);
|
|
743
487
|
return std::string("Failed to initialize resampler");
|
|
744
488
|
}
|
|
489
|
+
{
|
|
490
|
+
int initRet = swr_init(swr);
|
|
491
|
+
if (initRet < 0) {
|
|
492
|
+
char errbuf[256];
|
|
493
|
+
av_strerror(initRet, errbuf, sizeof(errbuf));
|
|
494
|
+
LOGE("convertToFormat: swr_init failed: %s", errbuf);
|
|
495
|
+
av_channel_layout_uninit(&in_ch_layout2);
|
|
496
|
+
swr_free(&swr);
|
|
497
|
+
av_channel_layout_uninit(&resampled->ch_layout);
|
|
498
|
+
av_frame_free(&frame);
|
|
499
|
+
av_frame_free(&resampled);
|
|
500
|
+
av_packet_free(&pkt);
|
|
501
|
+
avcodec_free_context(&encCtx);
|
|
502
|
+
avformat_free_context(outFmt);
|
|
503
|
+
avcodec_free_context(&decCtx);
|
|
504
|
+
avformat_close_input(&inFmt);
|
|
505
|
+
return std::string("Failed to initialize resampler (swr_init)");
|
|
506
|
+
}
|
|
507
|
+
}
|
|
745
508
|
av_channel_layout_uninit(&in_ch_layout2);
|
|
746
509
|
|
|
510
|
+
int totalDecodedFrames = 0;
|
|
511
|
+
int totalFramesSent = 0;
|
|
512
|
+
int totalPacketsFromEncoder = 0;
|
|
513
|
+
int flushPackets = 0;
|
|
514
|
+
int64_t encoder_pts = 0;
|
|
515
|
+
|
|
516
|
+
// Many encoders prefer / require a specific frame size (nb_samples) when using send_frame().
|
|
517
|
+
// MP3 (libshine) requires 1152 samples per frame.
|
|
518
|
+
// For others (e.g. FLAC), use encCtx->frame_size when available; otherwise use a conservative default.
|
|
519
|
+
const int default_frame_size = 1024;
|
|
520
|
+
const int enc_frame_size =
|
|
521
|
+
(codec_id == AV_CODEC_ID_MP3) ? 1152 :
|
|
522
|
+
(encCtx->frame_size > 0 ? encCtx->frame_size : default_frame_size);
|
|
523
|
+
int out_ch2 = encCtx->ch_layout.nb_channels;
|
|
524
|
+
if (out_ch2 <= 0) out_ch2 = 1;
|
|
525
|
+
int bytes_per_sample = av_get_bytes_per_sample(encCtx->sample_fmt);
|
|
526
|
+
|
|
527
|
+
// Accumulation buffer for resampled samples. Use read offset to avoid O(n²) memmove;
|
|
528
|
+
// compact only when offset exceeds threshold.
|
|
529
|
+
std::vector<uint8_t> accumBuf;
|
|
530
|
+
size_t accumReadOffset = 0; // bytes consumed from start (avoids O(n²) memmove)
|
|
531
|
+
const int bytesPerFrame = bytes_per_sample * out_ch2;
|
|
532
|
+
int accumSamples = 0;
|
|
533
|
+
|
|
534
|
+
const size_t kCompactThreshold = 256 * 1024; // compact when read offset exceeds 256 KB
|
|
535
|
+
|
|
536
|
+
auto maybeCompact = [&]() {
|
|
537
|
+
if (accumReadOffset == 0) return;
|
|
538
|
+
if (accumReadOffset < kCompactThreshold && accumReadOffset * 2 < accumBuf.size()) return;
|
|
539
|
+
size_t valid = accumBuf.size() - accumReadOffset;
|
|
540
|
+
if (valid > 0) memmove(accumBuf.data(), accumBuf.data() + accumReadOffset, valid);
|
|
541
|
+
accumBuf.resize(valid);
|
|
542
|
+
accumReadOffset = 0;
|
|
543
|
+
};
|
|
544
|
+
|
|
545
|
+
// Helper lambda: send exactly enc_frame_size samples from accumBuf to encoder
|
|
546
|
+
auto flushAccumFrames = [&](bool sendPartial) {
|
|
547
|
+
int needed = enc_frame_size;
|
|
548
|
+
if (needed <= 0) return;
|
|
549
|
+
|
|
550
|
+
while (accumSamples >= needed || (sendPartial && accumSamples > 0)) {
|
|
551
|
+
int toSend = (accumSamples >= needed) ? needed : accumSamples;
|
|
552
|
+
AVFrame* ef = av_frame_alloc();
|
|
553
|
+
if (!ef) break;
|
|
554
|
+
ef->format = encCtx->sample_fmt;
|
|
555
|
+
ef->sample_rate = encCtx->sample_rate;
|
|
556
|
+
if (av_channel_layout_copy(&ef->ch_layout, &encCtx->ch_layout) < 0) { av_frame_free(&ef); break; }
|
|
557
|
+
ef->nb_samples = toSend;
|
|
558
|
+
if (av_frame_get_buffer(ef, 0) < 0) { av_channel_layout_uninit(&ef->ch_layout); av_frame_free(&ef); break; }
|
|
559
|
+
int copyBytes = toSend * bytesPerFrame;
|
|
560
|
+
memcpy(ef->data[0], accumBuf.data() + accumReadOffset, copyBytes);
|
|
561
|
+
ef->pts = encoder_pts;
|
|
562
|
+
encoder_pts += toSend;
|
|
563
|
+
|
|
564
|
+
accumReadOffset += (size_t)copyBytes;
|
|
565
|
+
accumSamples -= toSend;
|
|
566
|
+
|
|
567
|
+
// Send to encoder with EAGAIN handling
|
|
568
|
+
for (;;) {
|
|
569
|
+
int ret = avcodec_send_frame(encCtx, ef);
|
|
570
|
+
if (ret == 0) break;
|
|
571
|
+
if (ret == AVERROR(EAGAIN)) {
|
|
572
|
+
AVPacket* op = av_packet_alloc();
|
|
573
|
+
while (avcodec_receive_packet(encCtx, op) == 0) {
|
|
574
|
+
op->stream_index = outStream->index;
|
|
575
|
+
av_packet_rescale_ts(op, encCtx->time_base, outStream->time_base);
|
|
576
|
+
av_interleaved_write_frame(outFmt, op);
|
|
577
|
+
av_packet_unref(op);
|
|
578
|
+
totalPacketsFromEncoder++;
|
|
579
|
+
}
|
|
580
|
+
av_packet_free(&op);
|
|
581
|
+
continue;
|
|
582
|
+
}
|
|
583
|
+
LOGW("convertToFormat: send_frame ret=%d frame=%d pts=%lld nb=%d", ret, totalFramesSent, (long long)ef->pts, toSend);
|
|
584
|
+
break;
|
|
585
|
+
}
|
|
586
|
+
// Drain any ready packets
|
|
587
|
+
AVPacket* op = av_packet_alloc();
|
|
588
|
+
while (avcodec_receive_packet(encCtx, op) == 0) {
|
|
589
|
+
op->stream_index = outStream->index;
|
|
590
|
+
av_packet_rescale_ts(op, encCtx->time_base, outStream->time_base);
|
|
591
|
+
av_interleaved_write_frame(outFmt, op);
|
|
592
|
+
av_packet_unref(op);
|
|
593
|
+
totalPacketsFromEncoder++;
|
|
594
|
+
}
|
|
595
|
+
av_packet_free(&op);
|
|
596
|
+
|
|
597
|
+
av_channel_layout_uninit(&ef->ch_layout);
|
|
598
|
+
av_frame_free(&ef);
|
|
599
|
+
totalFramesSent++;
|
|
600
|
+
|
|
601
|
+
if (!sendPartial && accumSamples < needed) break;
|
|
602
|
+
}
|
|
603
|
+
};
|
|
604
|
+
|
|
747
605
|
while (av_read_frame(inFmt, pkt) >= 0) {
|
|
748
606
|
if (pkt->stream_index == audioStreamIndex) {
|
|
749
607
|
if (avcodec_send_packet(decCtx, pkt) == 0) {
|
|
750
608
|
while (avcodec_receive_frame(decCtx, frame) == 0) {
|
|
609
|
+
totalDecodedFrames++;
|
|
751
610
|
int in_sr2 = inStream->codecpar->sample_rate ? inStream->codecpar->sample_rate : decCtx->sample_rate;
|
|
752
611
|
int64_t out_nb_samples = av_rescale_rnd(swr_get_delay(swr, in_sr2) + frame->nb_samples, encCtx->sample_rate, in_sr2, AV_ROUND_UP);
|
|
753
612
|
uint8_t** outData = nullptr;
|
|
754
|
-
int out_ch2 = encCtx->ch_layout.nb_channels;
|
|
755
|
-
if (out_ch2 <= 0) out_ch2 = 1;
|
|
756
613
|
if (av_samples_alloc_array_and_samples(&outData, nullptr, out_ch2, (int)out_nb_samples, encCtx->sample_fmt, 0) < 0) {
|
|
757
614
|
av_packet_unref(pkt);
|
|
758
615
|
continue;
|
|
759
616
|
}
|
|
760
617
|
int converted = swr_convert(swr, outData, (int)out_nb_samples, (const uint8_t**)frame->data, frame->nb_samples);
|
|
761
|
-
if (converted
|
|
618
|
+
if (converted <= 0) {
|
|
762
619
|
av_freep(&outData[0]);
|
|
763
620
|
av_freep(&outData);
|
|
764
621
|
continue;
|
|
765
622
|
}
|
|
766
623
|
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
int copy_size2 = converted * bytes_per_sample * out_ch2;
|
|
775
|
-
memcpy(resampled->data[0], outData[0], copy_size2);
|
|
776
|
-
|
|
777
|
-
if (avcodec_send_frame(encCtx, resampled) == 0) {
|
|
778
|
-
AVPacket* outPkt = av_packet_alloc();
|
|
779
|
-
while (avcodec_receive_packet(encCtx, outPkt) == 0) {
|
|
780
|
-
outPkt->stream_index = outStream->index;
|
|
781
|
-
av_packet_rescale_ts(outPkt, encCtx->time_base, outStream->time_base);
|
|
782
|
-
av_interleaved_write_frame(outFmt, outPkt);
|
|
783
|
-
av_packet_unref(outPkt);
|
|
784
|
-
}
|
|
785
|
-
av_packet_free(&outPkt);
|
|
786
|
-
}
|
|
624
|
+
|
|
625
|
+
int newBytes = converted * bytes_per_sample * out_ch2;
|
|
626
|
+
maybeCompact();
|
|
627
|
+
size_t oldSize = accumBuf.size();
|
|
628
|
+
accumBuf.resize(oldSize + (size_t)newBytes);
|
|
629
|
+
memcpy(accumBuf.data() + oldSize, outData[0], (size_t)newBytes);
|
|
630
|
+
accumSamples += converted;
|
|
787
631
|
|
|
788
632
|
av_freep(&outData[0]);
|
|
789
633
|
av_freep(&outData);
|
|
790
|
-
av_frame_unref(resampled);
|
|
791
634
|
av_frame_unref(frame);
|
|
635
|
+
|
|
636
|
+
flushAccumFrames(false);
|
|
792
637
|
}
|
|
793
638
|
}
|
|
794
639
|
}
|
|
795
640
|
av_packet_unref(pkt);
|
|
796
641
|
}
|
|
797
642
|
|
|
643
|
+
// Drain any remaining samples in swr (resampler delay)
|
|
644
|
+
{
|
|
645
|
+
uint8_t** tailData = nullptr;
|
|
646
|
+
int tailCap = swr_get_delay(swr, encCtx->sample_rate) + 256;
|
|
647
|
+
if (tailCap > 0 && av_samples_alloc_array_and_samples(&tailData, nullptr, out_ch2, tailCap, encCtx->sample_fmt, 0) >= 0) {
|
|
648
|
+
int tailConverted = swr_convert(swr, tailData, tailCap, nullptr, 0);
|
|
649
|
+
if (tailConverted > 0) {
|
|
650
|
+
int tailBytes = tailConverted * bytes_per_sample * out_ch2;
|
|
651
|
+
maybeCompact();
|
|
652
|
+
size_t oldSize = accumBuf.size();
|
|
653
|
+
accumBuf.resize(oldSize + (size_t)tailBytes);
|
|
654
|
+
memcpy(accumBuf.data() + oldSize, tailData[0], (size_t)tailBytes);
|
|
655
|
+
accumSamples += tailConverted;
|
|
656
|
+
}
|
|
657
|
+
av_freep(&tailData[0]);
|
|
658
|
+
av_freep(&tailData);
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
// Send remaining (partial) frames
|
|
662
|
+
flushAccumFrames(true);
|
|
663
|
+
|
|
664
|
+
(void)totalDecodedFrames; (void)totalPacketsFromEncoder;
|
|
665
|
+
|
|
798
666
|
// Flush encoder
|
|
799
667
|
avcodec_send_frame(encCtx, nullptr);
|
|
800
668
|
AVPacket* outPkt2 = av_packet_alloc();
|
|
801
669
|
while (avcodec_receive_packet(encCtx, outPkt2) == 0) {
|
|
670
|
+
flushPackets++;
|
|
802
671
|
outPkt2->stream_index = outStream->index;
|
|
803
672
|
av_packet_rescale_ts(outPkt2, encCtx->time_base, outStream->time_base);
|
|
804
673
|
av_interleaved_write_frame(outFmt, outPkt2);
|
|
805
674
|
av_packet_unref(outPkt2);
|
|
806
675
|
}
|
|
807
676
|
av_packet_free(&outPkt2);
|
|
677
|
+
(void)flushPackets;
|
|
808
678
|
|
|
809
679
|
av_write_trailer(outFmt);
|
|
810
680
|
if (!(outFmt->oformat->flags & AVFMT_NOFILE)) avio_closep(&outFmt->pb);
|
|
811
681
|
|
|
682
|
+
struct stat stOut = {};
|
|
683
|
+
long outputSizeBytes = (stat(outputPath, &stOut) == 0 && S_ISREG(stOut.st_mode)) ? (long)stOut.st_size : -1;
|
|
684
|
+
LOGI("convertToFormat: done outputPath=%s outputSizeBytes=%ld", outputPath ? outputPath : "(null)", outputSizeBytes);
|
|
685
|
+
|
|
812
686
|
av_packet_free(&pkt);
|
|
813
687
|
av_frame_free(&frame);
|
|
814
688
|
av_channel_layout_uninit(&resampled->ch_layout);
|