webrtcvad 0.1.0 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/webrtcvad/extconf.rb +29 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/division_operations.c +141 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/dot_product_with_scale.h +40 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/energy.c +39 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/get_scaling_square.c +46 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/include/signal_processing_library.h +1605 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/include/spl_inl.h +153 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/resample_48khz.c +186 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/resample_by_2_internal.c +689 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/resample_by_2_internal.h +60 -0
- data/ext/webrtcvad/webrtc/common_audio/signal_processing/resample_fractional.c +239 -0
- data/ext/webrtcvad/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c +77 -0
- data/ext/webrtcvad/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h +29 -0
- data/ext/webrtcvad/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_mips.c +207 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/include/webrtc_vad.h +87 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/vad_core.c +685 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/vad_core.h +114 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/vad_filterbank.c +329 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/vad_filterbank.h +45 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/vad_gmm.c +82 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/vad_gmm.h +39 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/vad_sp.c +176 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/vad_sp.h +54 -0
- data/ext/webrtcvad/webrtc/common_audio/vad/webrtc_vad.c +114 -0
- data/ext/webrtcvad/webrtc/rtc_base/checks.cc +207 -0
- data/ext/webrtcvad/webrtc/rtc_base/checks.h +400 -0
- data/ext/webrtcvad/webrtc/rtc_base/compile_assert_c.h +25 -0
- data/ext/webrtcvad/webrtc/rtc_base/numerics/safe_compare.h +176 -0
- data/ext/webrtcvad/webrtc/rtc_base/sanitizer.h +144 -0
- data/ext/webrtcvad/webrtc/rtc_base/system/inline.h +31 -0
- data/ext/webrtcvad/webrtc/rtc_base/system/rtc_export.h +43 -0
- data/ext/webrtcvad/webrtc/rtc_base/type_traits.h +140 -0
- data/ext/webrtcvad/webrtcvad.c +112 -0
- metadata +37 -3
@@ -0,0 +1,82 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
3
|
+
*
|
4
|
+
* Use of this source code is governed by a BSD-style license
|
5
|
+
* that can be found in the LICENSE file in the root of the source
|
6
|
+
* tree. An additional intellectual property rights grant can be found
|
7
|
+
* in the file PATENTS. All contributing project authors may
|
8
|
+
* be found in the AUTHORS file in the root of the source tree.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#include "common_audio/vad/vad_gmm.h"
|
12
|
+
|
13
|
+
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
14
|
+
|
15
|
+
static const int32_t kCompVar = 22005;
|
16
|
+
static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12.
|
17
|
+
|
18
|
+
// For a normal distribution, the probability of |input| is calculated and
|
19
|
+
// returned (in Q20). The formula for normal distributed probability is
|
20
|
+
//
|
21
|
+
// 1 / s * exp(-(x - m)^2 / (2 * s^2))
|
22
|
+
//
|
23
|
+
// where the parameters are given in the following Q domains:
|
24
|
+
// m = |mean| (Q7)
|
25
|
+
// s = |std| (Q7)
|
26
|
+
// x = |input| (Q4)
|
27
|
+
// in addition to the probability we output |delta| (in Q11) used when updating
|
28
|
+
// the noise/speech model.
|
29
|
+
int32_t WebRtcVad_GaussianProbability(int16_t input,
|
30
|
+
int16_t mean,
|
31
|
+
int16_t std,
|
32
|
+
int16_t* delta) {
|
33
|
+
int16_t tmp16, inv_std, inv_std2, exp_value = 0;
|
34
|
+
int32_t tmp32;
|
35
|
+
|
36
|
+
// Calculate |inv_std| = 1 / s, in Q10.
|
37
|
+
// 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation.
|
38
|
+
// Q-domain: Q17 / Q7 = Q10.
|
39
|
+
tmp32 = (int32_t) 131072 + (int32_t) (std >> 1);
|
40
|
+
inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std);
|
41
|
+
|
42
|
+
// Calculate |inv_std2| = 1 / s^2, in Q14.
|
43
|
+
tmp16 = (inv_std >> 2); // Q10 -> Q8.
|
44
|
+
// Q-domain: (Q8 * Q8) >> 2 = Q14.
|
45
|
+
inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
|
46
|
+
// TODO(bjornv): Investigate if changing to
|
47
|
+
// inv_std2 = (int16_t)((inv_std * inv_std) >> 6);
|
48
|
+
// gives better accuracy.
|
49
|
+
|
50
|
+
tmp16 = (input << 3); // Q4 -> Q7
|
51
|
+
tmp16 = tmp16 - mean; // Q7 - Q7 = Q7
|
52
|
+
|
53
|
+
// To be used later, when updating noise/speech model.
|
54
|
+
// |delta| = (x - m) / s^2, in Q11.
|
55
|
+
// Q-domain: (Q14 * Q7) >> 10 = Q11.
|
56
|
+
*delta = (int16_t)((inv_std2 * tmp16) >> 10);
|
57
|
+
|
58
|
+
// Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing
|
59
|
+
// division by two with one shift.
|
60
|
+
// Q-domain: (Q11 * Q7) >> 8 = Q10.
|
61
|
+
tmp32 = (*delta * tmp16) >> 9;
|
62
|
+
|
63
|
+
// If the exponent is small enough to give a non-zero probability we calculate
|
64
|
+
// |exp_value| ~= exp(-(x - m)^2 / (2 * s^2))
|
65
|
+
// ~= exp2(-log2(exp(1)) * |tmp32|).
|
66
|
+
if (tmp32 < kCompVar) {
|
67
|
+
// Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10.
|
68
|
+
// Q-domain: (Q12 * Q10) >> 12 = Q10.
|
69
|
+
tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
|
70
|
+
tmp16 = -tmp16;
|
71
|
+
exp_value = (0x0400 | (tmp16 & 0x03FF));
|
72
|
+
tmp16 ^= 0xFFFF;
|
73
|
+
tmp16 >>= 10;
|
74
|
+
tmp16 += 1;
|
75
|
+
// Get |exp_value| = exp(-|tmp32|) in Q10.
|
76
|
+
exp_value >>= tmp16;
|
77
|
+
}
|
78
|
+
|
79
|
+
// Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20.
|
80
|
+
// Q-domain: Q10 * Q10 = Q20.
|
81
|
+
return inv_std * exp_value;
|
82
|
+
}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
3
|
+
*
|
4
|
+
* Use of this source code is governed by a BSD-style license
|
5
|
+
* that can be found in the LICENSE file in the root of the source
|
6
|
+
* tree. An additional intellectual property rights grant can be found
|
7
|
+
* in the file PATENTS. All contributing project authors may
|
8
|
+
* be found in the AUTHORS file in the root of the source tree.
|
9
|
+
*/
|
10
|
+
|
11
|
+
// Gaussian probability calculations internally used in vad_core.c.
|
12
|
+
|
13
|
+
#ifndef COMMON_AUDIO_VAD_VAD_GMM_H_
|
14
|
+
#define COMMON_AUDIO_VAD_VAD_GMM_H_
|
15
|
+
|
16
|
+
#include <stdint.h>
|
17
|
+
|
18
|
+
// Calculates the probability for |input|, given that |input| comes from a
|
19
|
+
// normal distribution with mean and standard deviation (|mean|, |std|).
|
20
|
+
//
|
21
|
+
// Inputs:
|
22
|
+
// - input : input sample in Q4.
|
23
|
+
// - mean : mean input in the statistical model, Q7.
|
24
|
+
// - std : standard deviation, Q7.
|
25
|
+
//
|
26
|
+
// Output:
|
27
|
+
//
|
28
|
+
// - delta : input used when updating the model, Q11.
|
29
|
+
// |delta| = (|input| - |mean|) / |std|^2.
|
30
|
+
//
|
31
|
+
// Return:
|
32
|
+
// (probability for |input|) =
|
33
|
+
// 1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2));
|
34
|
+
int32_t WebRtcVad_GaussianProbability(int16_t input,
|
35
|
+
int16_t mean,
|
36
|
+
int16_t std,
|
37
|
+
int16_t* delta);
|
38
|
+
|
39
|
+
#endif // COMMON_AUDIO_VAD_VAD_GMM_H_
|
@@ -0,0 +1,176 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
3
|
+
*
|
4
|
+
* Use of this source code is governed by a BSD-style license
|
5
|
+
* that can be found in the LICENSE file in the root of the source
|
6
|
+
* tree. An additional intellectual property rights grant can be found
|
7
|
+
* in the file PATENTS. All contributing project authors may
|
8
|
+
* be found in the AUTHORS file in the root of the source tree.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#include "common_audio/vad/vad_sp.h"
|
12
|
+
|
13
|
+
#include "rtc_base/checks.h"
|
14
|
+
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
15
|
+
#include "common_audio/vad/vad_core.h"
|
16
|
+
|
17
|
+
// Allpass filter coefficients, upper and lower, in Q13.
|
18
|
+
// Upper: 0.64, Lower: 0.17.
|
19
|
+
static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13.
|
20
|
+
static const int16_t kSmoothingDown = 6553; // 0.2 in Q15.
|
21
|
+
static const int16_t kSmoothingUp = 32439; // 0.99 in Q15.
|
22
|
+
|
23
|
+
// TODO(bjornv): Move this function to vad_filterbank.c.
|
24
|
+
// Downsampling filter based on splitting filter and allpass functions.
|
25
|
+
void WebRtcVad_Downsampling(const int16_t* signal_in,
|
26
|
+
int16_t* signal_out,
|
27
|
+
int32_t* filter_state,
|
28
|
+
size_t in_length) {
|
29
|
+
int16_t tmp16_1 = 0, tmp16_2 = 0;
|
30
|
+
int32_t tmp32_1 = filter_state[0];
|
31
|
+
int32_t tmp32_2 = filter_state[1];
|
32
|
+
size_t n = 0;
|
33
|
+
// Downsampling by 2 gives half length.
|
34
|
+
size_t half_length = (in_length >> 1);
|
35
|
+
|
36
|
+
// Filter coefficients in Q13, filter state in Q0.
|
37
|
+
for (n = 0; n < half_length; n++) {
|
38
|
+
// All-pass filtering upper branch.
|
39
|
+
tmp16_1 = (int16_t) ((tmp32_1 >> 1) +
|
40
|
+
((kAllPassCoefsQ13[0] * *signal_in) >> 14));
|
41
|
+
*signal_out = tmp16_1;
|
42
|
+
tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12);
|
43
|
+
|
44
|
+
// All-pass filtering lower branch.
|
45
|
+
tmp16_2 = (int16_t) ((tmp32_2 >> 1) +
|
46
|
+
((kAllPassCoefsQ13[1] * *signal_in) >> 14));
|
47
|
+
*signal_out++ += tmp16_2;
|
48
|
+
tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12);
|
49
|
+
}
|
50
|
+
// Store the filter states.
|
51
|
+
filter_state[0] = tmp32_1;
|
52
|
+
filter_state[1] = tmp32_2;
|
53
|
+
}
|
54
|
+
|
55
|
+
// Inserts |feature_value| into |low_value_vector|, if it is one of the 16
|
56
|
+
// smallest values the last 100 frames. Then calculates and returns the median
|
57
|
+
// of the five smallest values.
|
58
|
+
int16_t WebRtcVad_FindMinimum(VadInstT* self,
|
59
|
+
int16_t feature_value,
|
60
|
+
int channel) {
|
61
|
+
int i = 0, j = 0;
|
62
|
+
int position = -1;
|
63
|
+
// Offset to beginning of the 16 minimum values in memory.
|
64
|
+
const int offset = (channel << 4);
|
65
|
+
int16_t current_median = 1600;
|
66
|
+
int16_t alpha = 0;
|
67
|
+
int32_t tmp32 = 0;
|
68
|
+
// Pointer to memory for the 16 minimum values and the age of each value of
|
69
|
+
// the |channel|.
|
70
|
+
int16_t* age = &self->index_vector[offset];
|
71
|
+
int16_t* smallest_values = &self->low_value_vector[offset];
|
72
|
+
|
73
|
+
RTC_DCHECK_LT(channel, kNumChannels);
|
74
|
+
|
75
|
+
// Each value in |smallest_values| is getting 1 loop older. Update |age|, and
|
76
|
+
// remove old values.
|
77
|
+
for (i = 0; i < 16; i++) {
|
78
|
+
if (age[i] != 100) {
|
79
|
+
age[i]++;
|
80
|
+
} else {
|
81
|
+
// Too old value. Remove from memory and shift larger values downwards.
|
82
|
+
for (j = i; j < 15; j++) {
|
83
|
+
smallest_values[j] = smallest_values[j + 1];
|
84
|
+
age[j] = age[j + 1];
|
85
|
+
}
|
86
|
+
age[15] = 101;
|
87
|
+
smallest_values[15] = 10000;
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
// Check if |feature_value| is smaller than any of the values in
|
92
|
+
// |smallest_values|. If so, find the |position| where to insert the new value
|
93
|
+
// (|feature_value|).
|
94
|
+
if (feature_value < smallest_values[7]) {
|
95
|
+
if (feature_value < smallest_values[3]) {
|
96
|
+
if (feature_value < smallest_values[1]) {
|
97
|
+
if (feature_value < smallest_values[0]) {
|
98
|
+
position = 0;
|
99
|
+
} else {
|
100
|
+
position = 1;
|
101
|
+
}
|
102
|
+
} else if (feature_value < smallest_values[2]) {
|
103
|
+
position = 2;
|
104
|
+
} else {
|
105
|
+
position = 3;
|
106
|
+
}
|
107
|
+
} else if (feature_value < smallest_values[5]) {
|
108
|
+
if (feature_value < smallest_values[4]) {
|
109
|
+
position = 4;
|
110
|
+
} else {
|
111
|
+
position = 5;
|
112
|
+
}
|
113
|
+
} else if (feature_value < smallest_values[6]) {
|
114
|
+
position = 6;
|
115
|
+
} else {
|
116
|
+
position = 7;
|
117
|
+
}
|
118
|
+
} else if (feature_value < smallest_values[15]) {
|
119
|
+
if (feature_value < smallest_values[11]) {
|
120
|
+
if (feature_value < smallest_values[9]) {
|
121
|
+
if (feature_value < smallest_values[8]) {
|
122
|
+
position = 8;
|
123
|
+
} else {
|
124
|
+
position = 9;
|
125
|
+
}
|
126
|
+
} else if (feature_value < smallest_values[10]) {
|
127
|
+
position = 10;
|
128
|
+
} else {
|
129
|
+
position = 11;
|
130
|
+
}
|
131
|
+
} else if (feature_value < smallest_values[13]) {
|
132
|
+
if (feature_value < smallest_values[12]) {
|
133
|
+
position = 12;
|
134
|
+
} else {
|
135
|
+
position = 13;
|
136
|
+
}
|
137
|
+
} else if (feature_value < smallest_values[14]) {
|
138
|
+
position = 14;
|
139
|
+
} else {
|
140
|
+
position = 15;
|
141
|
+
}
|
142
|
+
}
|
143
|
+
|
144
|
+
// If we have detected a new small value, insert it at the correct position
|
145
|
+
// and shift larger values up.
|
146
|
+
if (position > -1) {
|
147
|
+
for (i = 15; i > position; i--) {
|
148
|
+
smallest_values[i] = smallest_values[i - 1];
|
149
|
+
age[i] = age[i - 1];
|
150
|
+
}
|
151
|
+
smallest_values[position] = feature_value;
|
152
|
+
age[position] = 1;
|
153
|
+
}
|
154
|
+
|
155
|
+
// Get |current_median|.
|
156
|
+
if (self->frame_counter > 2) {
|
157
|
+
current_median = smallest_values[2];
|
158
|
+
} else if (self->frame_counter > 0) {
|
159
|
+
current_median = smallest_values[0];
|
160
|
+
}
|
161
|
+
|
162
|
+
// Smooth the median value.
|
163
|
+
if (self->frame_counter > 0) {
|
164
|
+
if (current_median < self->mean_value[channel]) {
|
165
|
+
alpha = kSmoothingDown; // 0.2 in Q15.
|
166
|
+
} else {
|
167
|
+
alpha = kSmoothingUp; // 0.99 in Q15.
|
168
|
+
}
|
169
|
+
}
|
170
|
+
tmp32 = (alpha + 1) * self->mean_value[channel];
|
171
|
+
tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median;
|
172
|
+
tmp32 += 16384;
|
173
|
+
self->mean_value[channel] = (int16_t) (tmp32 >> 15);
|
174
|
+
|
175
|
+
return self->mean_value[channel];
|
176
|
+
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
3
|
+
*
|
4
|
+
* Use of this source code is governed by a BSD-style license
|
5
|
+
* that can be found in the LICENSE file in the root of the source
|
6
|
+
* tree. An additional intellectual property rights grant can be found
|
7
|
+
* in the file PATENTS. All contributing project authors may
|
8
|
+
* be found in the AUTHORS file in the root of the source tree.
|
9
|
+
*/
|
10
|
+
|
11
|
+
// This file includes specific signal processing tools used in vad_core.c.
|
12
|
+
|
13
|
+
#ifndef COMMON_AUDIO_VAD_VAD_SP_H_
|
14
|
+
#define COMMON_AUDIO_VAD_VAD_SP_H_
|
15
|
+
|
16
|
+
#include "common_audio/vad/vad_core.h"
|
17
|
+
|
18
|
+
// Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
|
19
|
+
//
|
20
|
+
// Inputs:
|
21
|
+
// - signal_in : Input signal.
|
22
|
+
// - in_length : Length of input signal in samples.
|
23
|
+
//
|
24
|
+
// Input & Output:
|
25
|
+
// - filter_state : Current filter states of the two all-pass filters. The
|
26
|
+
// |filter_state| is updated after all samples have been
|
27
|
+
// processed.
|
28
|
+
//
|
29
|
+
// Output:
|
30
|
+
// - signal_out : Downsampled signal (of length |in_length| / 2).
|
31
|
+
void WebRtcVad_Downsampling(const int16_t* signal_in,
|
32
|
+
int16_t* signal_out,
|
33
|
+
int32_t* filter_state,
|
34
|
+
size_t in_length);
|
35
|
+
|
36
|
+
// Updates and returns the smoothed feature minimum. As minimum we use the
|
37
|
+
// median of the five smallest feature values in a 100 frames long window.
|
38
|
+
// As long as |handle->frame_counter| is zero, that is, we haven't received any
|
39
|
+
// "valid" data, FindMinimum() outputs the default value of 1600.
|
40
|
+
//
|
41
|
+
// Inputs:
|
42
|
+
// - feature_value : New feature value to update with.
|
43
|
+
// - channel : Channel number.
|
44
|
+
//
|
45
|
+
// Input & Output:
|
46
|
+
// - handle : State information of the VAD.
|
47
|
+
//
|
48
|
+
// Returns:
|
49
|
+
// : Smoothed minimum value for a moving window.
|
50
|
+
int16_t WebRtcVad_FindMinimum(VadInstT* handle,
|
51
|
+
int16_t feature_value,
|
52
|
+
int channel);
|
53
|
+
|
54
|
+
#endif // COMMON_AUDIO_VAD_VAD_SP_H_
|
@@ -0,0 +1,114 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
3
|
+
*
|
4
|
+
* Use of this source code is governed by a BSD-style license
|
5
|
+
* that can be found in the LICENSE file in the root of the source
|
6
|
+
* tree. An additional intellectual property rights grant can be found
|
7
|
+
* in the file PATENTS. All contributing project authors may
|
8
|
+
* be found in the AUTHORS file in the root of the source tree.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#include "common_audio/vad/include/webrtc_vad.h"
|
12
|
+
|
13
|
+
#include <stdlib.h>
|
14
|
+
#include <string.h>
|
15
|
+
|
16
|
+
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
17
|
+
#include "common_audio/vad/vad_core.h"
|
18
|
+
|
19
|
+
static const int kInitCheck = 42;
|
20
|
+
static const int kValidRates[] = { 8000, 16000, 32000, 48000 };
|
21
|
+
static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates);
|
22
|
+
static const int kMaxFrameLengthMs = 30;
|
23
|
+
|
24
|
+
VadInst* WebRtcVad_Create() {
|
25
|
+
VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT));
|
26
|
+
|
27
|
+
self->init_flag = 0;
|
28
|
+
|
29
|
+
return (VadInst*)self;
|
30
|
+
}
|
31
|
+
|
32
|
+
void WebRtcVad_Free(VadInst* handle) {
|
33
|
+
free(handle);
|
34
|
+
}
|
35
|
+
|
36
|
+
// TODO(bjornv): Move WebRtcVad_InitCore() code here.
|
37
|
+
int WebRtcVad_Init(VadInst* handle) {
|
38
|
+
// Initialize the core VAD component.
|
39
|
+
return WebRtcVad_InitCore((VadInstT*) handle);
|
40
|
+
}
|
41
|
+
|
42
|
+
// TODO(bjornv): Move WebRtcVad_set_mode_core() code here.
|
43
|
+
int WebRtcVad_set_mode(VadInst* handle, int mode) {
|
44
|
+
VadInstT* self = (VadInstT*) handle;
|
45
|
+
|
46
|
+
if (handle == NULL) {
|
47
|
+
return -1;
|
48
|
+
}
|
49
|
+
if (self->init_flag != kInitCheck) {
|
50
|
+
return -1;
|
51
|
+
}
|
52
|
+
|
53
|
+
return WebRtcVad_set_mode_core(self, mode);
|
54
|
+
}
|
55
|
+
|
56
|
+
int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame,
|
57
|
+
size_t frame_length) {
|
58
|
+
int vad = -1;
|
59
|
+
VadInstT* self = (VadInstT*) handle;
|
60
|
+
|
61
|
+
if (handle == NULL) {
|
62
|
+
return -1;
|
63
|
+
}
|
64
|
+
|
65
|
+
if (self->init_flag != kInitCheck) {
|
66
|
+
return -1;
|
67
|
+
}
|
68
|
+
if (audio_frame == NULL) {
|
69
|
+
return -1;
|
70
|
+
}
|
71
|
+
if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) {
|
72
|
+
return -1;
|
73
|
+
}
|
74
|
+
|
75
|
+
if (fs == 48000) {
|
76
|
+
vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length);
|
77
|
+
} else if (fs == 32000) {
|
78
|
+
vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length);
|
79
|
+
} else if (fs == 16000) {
|
80
|
+
vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length);
|
81
|
+
} else if (fs == 8000) {
|
82
|
+
vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length);
|
83
|
+
}
|
84
|
+
|
85
|
+
if (vad > 0) {
|
86
|
+
vad = 1;
|
87
|
+
}
|
88
|
+
return vad;
|
89
|
+
}
|
90
|
+
|
91
|
+
int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) {
|
92
|
+
int return_value = -1;
|
93
|
+
size_t i;
|
94
|
+
int valid_length_ms;
|
95
|
+
size_t valid_length;
|
96
|
+
|
97
|
+
// We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and
|
98
|
+
// see if we have a matching pair.
|
99
|
+
for (i = 0; i < kRatesSize; i++) {
|
100
|
+
if (kValidRates[i] == rate) {
|
101
|
+
for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs;
|
102
|
+
valid_length_ms += 10) {
|
103
|
+
valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms);
|
104
|
+
if (frame_length == valid_length) {
|
105
|
+
return_value = 0;
|
106
|
+
break;
|
107
|
+
}
|
108
|
+
}
|
109
|
+
break;
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
return return_value;
|
114
|
+
}
|