webrtcvad 0.1.0 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/ext/webrtcvad/extconf.rb +29 -0
  3. data/ext/webrtcvad/webrtc/common_audio/signal_processing/division_operations.c +141 -0
  4. data/ext/webrtcvad/webrtc/common_audio/signal_processing/dot_product_with_scale.h +40 -0
  5. data/ext/webrtcvad/webrtc/common_audio/signal_processing/energy.c +39 -0
  6. data/ext/webrtcvad/webrtc/common_audio/signal_processing/get_scaling_square.c +46 -0
  7. data/ext/webrtcvad/webrtc/common_audio/signal_processing/include/signal_processing_library.h +1605 -0
  8. data/ext/webrtcvad/webrtc/common_audio/signal_processing/include/spl_inl.h +153 -0
  9. data/ext/webrtcvad/webrtc/common_audio/signal_processing/resample_48khz.c +186 -0
  10. data/ext/webrtcvad/webrtc/common_audio/signal_processing/resample_by_2_internal.c +689 -0
  11. data/ext/webrtcvad/webrtc/common_audio/signal_processing/resample_by_2_internal.h +60 -0
  12. data/ext/webrtcvad/webrtc/common_audio/signal_processing/resample_fractional.c +239 -0
  13. data/ext/webrtcvad/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c +77 -0
  14. data/ext/webrtcvad/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h +29 -0
  15. data/ext/webrtcvad/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_mips.c +207 -0
  16. data/ext/webrtcvad/webrtc/common_audio/vad/include/webrtc_vad.h +87 -0
  17. data/ext/webrtcvad/webrtc/common_audio/vad/vad_core.c +685 -0
  18. data/ext/webrtcvad/webrtc/common_audio/vad/vad_core.h +114 -0
  19. data/ext/webrtcvad/webrtc/common_audio/vad/vad_filterbank.c +329 -0
  20. data/ext/webrtcvad/webrtc/common_audio/vad/vad_filterbank.h +45 -0
  21. data/ext/webrtcvad/webrtc/common_audio/vad/vad_gmm.c +82 -0
  22. data/ext/webrtcvad/webrtc/common_audio/vad/vad_gmm.h +39 -0
  23. data/ext/webrtcvad/webrtc/common_audio/vad/vad_sp.c +176 -0
  24. data/ext/webrtcvad/webrtc/common_audio/vad/vad_sp.h +54 -0
  25. data/ext/webrtcvad/webrtc/common_audio/vad/webrtc_vad.c +114 -0
  26. data/ext/webrtcvad/webrtc/rtc_base/checks.cc +207 -0
  27. data/ext/webrtcvad/webrtc/rtc_base/checks.h +400 -0
  28. data/ext/webrtcvad/webrtc/rtc_base/compile_assert_c.h +25 -0
  29. data/ext/webrtcvad/webrtc/rtc_base/numerics/safe_compare.h +176 -0
  30. data/ext/webrtcvad/webrtc/rtc_base/sanitizer.h +144 -0
  31. data/ext/webrtcvad/webrtc/rtc_base/system/inline.h +31 -0
  32. data/ext/webrtcvad/webrtc/rtc_base/system/rtc_export.h +43 -0
  33. data/ext/webrtcvad/webrtc/rtc_base/type_traits.h +140 -0
  34. data/ext/webrtcvad/webrtcvad.c +112 -0
  35. metadata +37 -3
@@ -0,0 +1,82 @@
1
+ /*
2
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3
+ *
4
+ * Use of this source code is governed by a BSD-style license
5
+ * that can be found in the LICENSE file in the root of the source
6
+ * tree. An additional intellectual property rights grant can be found
7
+ * in the file PATENTS. All contributing project authors may
8
+ * be found in the AUTHORS file in the root of the source tree.
9
+ */
10
+
11
+ #include "common_audio/vad/vad_gmm.h"
12
+
13
+ #include "common_audio/signal_processing/include/signal_processing_library.h"
14
+
15
+ static const int32_t kCompVar = 22005;
16
+ static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12.
17
+
18
+ // For a normal distribution, the probability of |input| is calculated and
19
+ // returned (in Q20). The formula for normal distributed probability is
20
+ //
21
+ // 1 / s * exp(-(x - m)^2 / (2 * s^2))
22
+ //
23
+ // where the parameters are given in the following Q domains:
24
+ // m = |mean| (Q7)
25
+ // s = |std| (Q7)
26
+ // x = |input| (Q4)
27
+ // in addition to the probability we output |delta| (in Q11) used when updating
28
+ // the noise/speech model.
29
+ int32_t WebRtcVad_GaussianProbability(int16_t input,
30
+ int16_t mean,
31
+ int16_t std,
32
+ int16_t* delta) {
33
+ int16_t tmp16, inv_std, inv_std2, exp_value = 0;
34
+ int32_t tmp32;
35
+
36
+ // Calculate |inv_std| = 1 / s, in Q10.
37
+ // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation.
38
+ // Q-domain: Q17 / Q7 = Q10.
39
+ tmp32 = (int32_t) 131072 + (int32_t) (std >> 1);
40
+ inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std);
41
+
42
+ // Calculate |inv_std2| = 1 / s^2, in Q14.
43
+ tmp16 = (inv_std >> 2); // Q10 -> Q8.
44
+ // Q-domain: (Q8 * Q8) >> 2 = Q14.
45
+ inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
46
+ // TODO(bjornv): Investigate if changing to
47
+ // inv_std2 = (int16_t)((inv_std * inv_std) >> 6);
48
+ // gives better accuracy.
49
+
50
+ tmp16 = (input << 3); // Q4 -> Q7
51
+ tmp16 = tmp16 - mean; // Q7 - Q7 = Q7
52
+
53
+ // To be used later, when updating noise/speech model.
54
+ // |delta| = (x - m) / s^2, in Q11.
55
+ // Q-domain: (Q14 * Q7) >> 10 = Q11.
56
+ *delta = (int16_t)((inv_std2 * tmp16) >> 10);
57
+
58
+ // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing
59
+ // division by two with one shift.
60
+ // Q-domain: (Q11 * Q7) >> 8 = Q10.
61
+ tmp32 = (*delta * tmp16) >> 9;
62
+
63
+ // If the exponent is small enough to give a non-zero probability we calculate
64
+ // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2))
65
+ // ~= exp2(-log2(exp(1)) * |tmp32|).
66
+ if (tmp32 < kCompVar) {
67
+ // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10.
68
+ // Q-domain: (Q12 * Q10) >> 12 = Q10.
69
+ tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
70
+ tmp16 = -tmp16;
71
+ exp_value = (0x0400 | (tmp16 & 0x03FF));
72
+ tmp16 ^= 0xFFFF;
73
+ tmp16 >>= 10;
74
+ tmp16 += 1;
75
+ // Get |exp_value| = exp(-|tmp32|) in Q10.
76
+ exp_value >>= tmp16;
77
+ }
78
+
79
+ // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20.
80
+ // Q-domain: Q10 * Q10 = Q20.
81
+ return inv_std * exp_value;
82
+ }
@@ -0,0 +1,39 @@
1
+ /*
2
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3
+ *
4
+ * Use of this source code is governed by a BSD-style license
5
+ * that can be found in the LICENSE file in the root of the source
6
+ * tree. An additional intellectual property rights grant can be found
7
+ * in the file PATENTS. All contributing project authors may
8
+ * be found in the AUTHORS file in the root of the source tree.
9
+ */
10
+
11
+ // Gaussian probability calculations internally used in vad_core.c.
12
+
13
+ #ifndef COMMON_AUDIO_VAD_VAD_GMM_H_
14
+ #define COMMON_AUDIO_VAD_VAD_GMM_H_
15
+
16
+ #include <stdint.h>
17
+
18
+ // Calculates the probability for |input|, given that |input| comes from a
19
+ // normal distribution with mean and standard deviation (|mean|, |std|).
20
+ //
21
+ // Inputs:
22
+ // - input : input sample in Q4.
23
+ // - mean : mean input in the statistical model, Q7.
24
+ // - std : standard deviation, Q7.
25
+ //
26
+ // Output:
27
+ //
28
+ // - delta : input used when updating the model, Q11.
29
+ // |delta| = (|input| - |mean|) / |std|^2.
30
+ //
31
+ // Return:
32
+ // (probability for |input|) =
33
+ // 1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2));
34
+ int32_t WebRtcVad_GaussianProbability(int16_t input,
35
+ int16_t mean,
36
+ int16_t std,
37
+ int16_t* delta);
38
+
39
+ #endif // COMMON_AUDIO_VAD_VAD_GMM_H_
@@ -0,0 +1,176 @@
1
+ /*
2
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3
+ *
4
+ * Use of this source code is governed by a BSD-style license
5
+ * that can be found in the LICENSE file in the root of the source
6
+ * tree. An additional intellectual property rights grant can be found
7
+ * in the file PATENTS. All contributing project authors may
8
+ * be found in the AUTHORS file in the root of the source tree.
9
+ */
10
+
11
+ #include "common_audio/vad/vad_sp.h"
12
+
13
+ #include "rtc_base/checks.h"
14
+ #include "common_audio/signal_processing/include/signal_processing_library.h"
15
+ #include "common_audio/vad/vad_core.h"
16
+
17
+ // Allpass filter coefficients, upper and lower, in Q13.
18
+ // Upper: 0.64, Lower: 0.17.
19
+ static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13.
20
+ static const int16_t kSmoothingDown = 6553; // 0.2 in Q15.
21
+ static const int16_t kSmoothingUp = 32439; // 0.99 in Q15.
22
+
23
+ // TODO(bjornv): Move this function to vad_filterbank.c.
24
+ // Downsampling filter based on splitting filter and allpass functions.
25
+ void WebRtcVad_Downsampling(const int16_t* signal_in,
26
+ int16_t* signal_out,
27
+ int32_t* filter_state,
28
+ size_t in_length) {
29
+ int16_t tmp16_1 = 0, tmp16_2 = 0;
30
+ int32_t tmp32_1 = filter_state[0];
31
+ int32_t tmp32_2 = filter_state[1];
32
+ size_t n = 0;
33
+ // Downsampling by 2 gives half length.
34
+ size_t half_length = (in_length >> 1);
35
+
36
+ // Filter coefficients in Q13, filter state in Q0.
37
+ for (n = 0; n < half_length; n++) {
38
+ // All-pass filtering upper branch.
39
+ tmp16_1 = (int16_t) ((tmp32_1 >> 1) +
40
+ ((kAllPassCoefsQ13[0] * *signal_in) >> 14));
41
+ *signal_out = tmp16_1;
42
+ tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12);
43
+
44
+ // All-pass filtering lower branch.
45
+ tmp16_2 = (int16_t) ((tmp32_2 >> 1) +
46
+ ((kAllPassCoefsQ13[1] * *signal_in) >> 14));
47
+ *signal_out++ += tmp16_2;
48
+ tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12);
49
+ }
50
+ // Store the filter states.
51
+ filter_state[0] = tmp32_1;
52
+ filter_state[1] = tmp32_2;
53
+ }
54
+
55
+ // Inserts |feature_value| into |low_value_vector|, if it is one of the 16
56
+ // smallest values the last 100 frames. Then calculates and returns the median
57
+ // of the five smallest values.
58
+ int16_t WebRtcVad_FindMinimum(VadInstT* self,
59
+ int16_t feature_value,
60
+ int channel) {
61
+ int i = 0, j = 0;
62
+ int position = -1;
63
+ // Offset to beginning of the 16 minimum values in memory.
64
+ const int offset = (channel << 4);
65
+ int16_t current_median = 1600;
66
+ int16_t alpha = 0;
67
+ int32_t tmp32 = 0;
68
+ // Pointer to memory for the 16 minimum values and the age of each value of
69
+ // the |channel|.
70
+ int16_t* age = &self->index_vector[offset];
71
+ int16_t* smallest_values = &self->low_value_vector[offset];
72
+
73
+ RTC_DCHECK_LT(channel, kNumChannels);
74
+
75
+ // Each value in |smallest_values| is getting 1 loop older. Update |age|, and
76
+ // remove old values.
77
+ for (i = 0; i < 16; i++) {
78
+ if (age[i] != 100) {
79
+ age[i]++;
80
+ } else {
81
+ // Too old value. Remove from memory and shift larger values downwards.
82
+ for (j = i; j < 15; j++) {
83
+ smallest_values[j] = smallest_values[j + 1];
84
+ age[j] = age[j + 1];
85
+ }
86
+ age[15] = 101;
87
+ smallest_values[15] = 10000;
88
+ }
89
+ }
90
+
91
+ // Check if |feature_value| is smaller than any of the values in
92
+ // |smallest_values|. If so, find the |position| where to insert the new value
93
+ // (|feature_value|).
94
+ if (feature_value < smallest_values[7]) {
95
+ if (feature_value < smallest_values[3]) {
96
+ if (feature_value < smallest_values[1]) {
97
+ if (feature_value < smallest_values[0]) {
98
+ position = 0;
99
+ } else {
100
+ position = 1;
101
+ }
102
+ } else if (feature_value < smallest_values[2]) {
103
+ position = 2;
104
+ } else {
105
+ position = 3;
106
+ }
107
+ } else if (feature_value < smallest_values[5]) {
108
+ if (feature_value < smallest_values[4]) {
109
+ position = 4;
110
+ } else {
111
+ position = 5;
112
+ }
113
+ } else if (feature_value < smallest_values[6]) {
114
+ position = 6;
115
+ } else {
116
+ position = 7;
117
+ }
118
+ } else if (feature_value < smallest_values[15]) {
119
+ if (feature_value < smallest_values[11]) {
120
+ if (feature_value < smallest_values[9]) {
121
+ if (feature_value < smallest_values[8]) {
122
+ position = 8;
123
+ } else {
124
+ position = 9;
125
+ }
126
+ } else if (feature_value < smallest_values[10]) {
127
+ position = 10;
128
+ } else {
129
+ position = 11;
130
+ }
131
+ } else if (feature_value < smallest_values[13]) {
132
+ if (feature_value < smallest_values[12]) {
133
+ position = 12;
134
+ } else {
135
+ position = 13;
136
+ }
137
+ } else if (feature_value < smallest_values[14]) {
138
+ position = 14;
139
+ } else {
140
+ position = 15;
141
+ }
142
+ }
143
+
144
+ // If we have detected a new small value, insert it at the correct position
145
+ // and shift larger values up.
146
+ if (position > -1) {
147
+ for (i = 15; i > position; i--) {
148
+ smallest_values[i] = smallest_values[i - 1];
149
+ age[i] = age[i - 1];
150
+ }
151
+ smallest_values[position] = feature_value;
152
+ age[position] = 1;
153
+ }
154
+
155
+ // Get |current_median|.
156
+ if (self->frame_counter > 2) {
157
+ current_median = smallest_values[2];
158
+ } else if (self->frame_counter > 0) {
159
+ current_median = smallest_values[0];
160
+ }
161
+
162
+ // Smooth the median value.
163
+ if (self->frame_counter > 0) {
164
+ if (current_median < self->mean_value[channel]) {
165
+ alpha = kSmoothingDown; // 0.2 in Q15.
166
+ } else {
167
+ alpha = kSmoothingUp; // 0.99 in Q15.
168
+ }
169
+ }
170
+ tmp32 = (alpha + 1) * self->mean_value[channel];
171
+ tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median;
172
+ tmp32 += 16384;
173
+ self->mean_value[channel] = (int16_t) (tmp32 >> 15);
174
+
175
+ return self->mean_value[channel];
176
+ }
@@ -0,0 +1,54 @@
1
+ /*
2
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3
+ *
4
+ * Use of this source code is governed by a BSD-style license
5
+ * that can be found in the LICENSE file in the root of the source
6
+ * tree. An additional intellectual property rights grant can be found
7
+ * in the file PATENTS. All contributing project authors may
8
+ * be found in the AUTHORS file in the root of the source tree.
9
+ */
10
+
11
+ // This file includes specific signal processing tools used in vad_core.c.
12
+
13
+ #ifndef COMMON_AUDIO_VAD_VAD_SP_H_
14
+ #define COMMON_AUDIO_VAD_VAD_SP_H_
15
+
16
+ #include "common_audio/vad/vad_core.h"
17
+
18
+ // Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
19
+ //
20
+ // Inputs:
21
+ // - signal_in : Input signal.
22
+ // - in_length : Length of input signal in samples.
23
+ //
24
+ // Input & Output:
25
+ // - filter_state : Current filter states of the two all-pass filters. The
26
+ // |filter_state| is updated after all samples have been
27
+ // processed.
28
+ //
29
+ // Output:
30
+ // - signal_out : Downsampled signal (of length |in_length| / 2).
31
+ void WebRtcVad_Downsampling(const int16_t* signal_in,
32
+ int16_t* signal_out,
33
+ int32_t* filter_state,
34
+ size_t in_length);
35
+
36
+ // Updates and returns the smoothed feature minimum. As minimum we use the
37
+ // median of the five smallest feature values in a 100 frames long window.
38
+ // As long as |handle->frame_counter| is zero, that is, we haven't received any
39
+ // "valid" data, FindMinimum() outputs the default value of 1600.
40
+ //
41
+ // Inputs:
42
+ // - feature_value : New feature value to update with.
43
+ // - channel : Channel number.
44
+ //
45
+ // Input & Output:
46
+ // - handle : State information of the VAD.
47
+ //
48
+ // Returns:
49
+ // : Smoothed minimum value for a moving window.
50
+ int16_t WebRtcVad_FindMinimum(VadInstT* handle,
51
+ int16_t feature_value,
52
+ int channel);
53
+
54
+ #endif // COMMON_AUDIO_VAD_VAD_SP_H_
@@ -0,0 +1,114 @@
1
+ /*
2
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3
+ *
4
+ * Use of this source code is governed by a BSD-style license
5
+ * that can be found in the LICENSE file in the root of the source
6
+ * tree. An additional intellectual property rights grant can be found
7
+ * in the file PATENTS. All contributing project authors may
8
+ * be found in the AUTHORS file in the root of the source tree.
9
+ */
10
+
11
+ #include "common_audio/vad/include/webrtc_vad.h"
12
+
13
+ #include <stdlib.h>
14
+ #include <string.h>
15
+
16
+ #include "common_audio/signal_processing/include/signal_processing_library.h"
17
+ #include "common_audio/vad/vad_core.h"
18
+
19
+ static const int kInitCheck = 42;
20
+ static const int kValidRates[] = { 8000, 16000, 32000, 48000 };
21
+ static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates);
22
+ static const int kMaxFrameLengthMs = 30;
23
+
24
+ VadInst* WebRtcVad_Create() {
25
+ VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT));
26
+
27
+ self->init_flag = 0;
28
+
29
+ return (VadInst*)self;
30
+ }
31
+
32
+ void WebRtcVad_Free(VadInst* handle) {
33
+ free(handle);
34
+ }
35
+
36
+ // TODO(bjornv): Move WebRtcVad_InitCore() code here.
37
+ int WebRtcVad_Init(VadInst* handle) {
38
+ // Initialize the core VAD component.
39
+ return WebRtcVad_InitCore((VadInstT*) handle);
40
+ }
41
+
42
+ // TODO(bjornv): Move WebRtcVad_set_mode_core() code here.
43
+ int WebRtcVad_set_mode(VadInst* handle, int mode) {
44
+ VadInstT* self = (VadInstT*) handle;
45
+
46
+ if (handle == NULL) {
47
+ return -1;
48
+ }
49
+ if (self->init_flag != kInitCheck) {
50
+ return -1;
51
+ }
52
+
53
+ return WebRtcVad_set_mode_core(self, mode);
54
+ }
55
+
56
+ int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame,
57
+ size_t frame_length) {
58
+ int vad = -1;
59
+ VadInstT* self = (VadInstT*) handle;
60
+
61
+ if (handle == NULL) {
62
+ return -1;
63
+ }
64
+
65
+ if (self->init_flag != kInitCheck) {
66
+ return -1;
67
+ }
68
+ if (audio_frame == NULL) {
69
+ return -1;
70
+ }
71
+ if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) {
72
+ return -1;
73
+ }
74
+
75
+ if (fs == 48000) {
76
+ vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length);
77
+ } else if (fs == 32000) {
78
+ vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length);
79
+ } else if (fs == 16000) {
80
+ vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length);
81
+ } else if (fs == 8000) {
82
+ vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length);
83
+ }
84
+
85
+ if (vad > 0) {
86
+ vad = 1;
87
+ }
88
+ return vad;
89
+ }
90
+
91
+ int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) {
92
+ int return_value = -1;
93
+ size_t i;
94
+ int valid_length_ms;
95
+ size_t valid_length;
96
+
97
+ // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and
98
+ // see if we have a matching pair.
99
+ for (i = 0; i < kRatesSize; i++) {
100
+ if (kValidRates[i] == rate) {
101
+ for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs;
102
+ valid_length_ms += 10) {
103
+ valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms);
104
+ if (frame_length == valid_length) {
105
+ return_value = 0;
106
+ break;
107
+ }
108
+ }
109
+ break;
110
+ }
111
+ }
112
+
113
+ return return_value;
114
+ }