noyes 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/c_impl/array_list.c +64 -0
- data/lib/c_impl/bent_cent_marker.c +36 -0
- data/lib/c_impl/discrete_cosine_transform.c +16 -0
- data/lib/c_impl/n_array_list.c +72 -0
- data/lib/c_impl/n_array_list.h +18 -0
- data/lib/c_impl/n_bent_cent_marker.c +48 -0
- data/lib/c_impl/n_speech_trimmer.c +67 -0
- data/lib/c_impl/noyes.h +36 -0
- data/lib/c_impl/noyes_c.c +3 -0
- data/lib/c_impl/rnoyes.h +2 -0
- data/lib/c_impl/speech_trimmer.c +52 -0
- data/lib/common/ruby_ext.rb +19 -0
- data/lib/common/send_incrementally.rb +0 -1
- data/lib/common.rb +1 -0
- data/lib/java_impl/bent_cent_marker.rb +13 -0
- data/lib/java_impl/speech_trimmer.rb +18 -0
- data/lib/noyes.rb +2 -0
- data/lib/noyes_java.rb +2 -0
- data/lib/ruby_impl/bent_cent_marker.rb +40 -0
- data/lib/ruby_impl/speech_trimmer.rb +54 -0
- data/ship/noyes.jar +0 -0
- metadata +14 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.9.0
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "noyes.h"
|
3
|
+
#include "rnoyes.h"
|
4
|
+
|
5
|
+
static int id_push;
|
6
|
+
|
7
|
+
VALUE cArrayList;
|
8
|
+
|
9
|
+
static void free_n_list(void *p) {
|
10
|
+
n_list_free(p);
|
11
|
+
}
|
12
|
+
|
13
|
+
static VALUE t_init(VALUE self) {
|
14
|
+
NList *st = n_list_new();
|
15
|
+
VALUE stv = Data_Wrap_Struct(cArrayList, 0, free_n_list, st);
|
16
|
+
rb_iv_set(self, "@n_list", stv);
|
17
|
+
return self;
|
18
|
+
}
|
19
|
+
|
20
|
+
static VALUE t_size(VALUE self) {
|
21
|
+
NList *array;
|
22
|
+
VALUE arrayv = rb_iv_get(self, "@n_list");
|
23
|
+
Data_Get_Struct(arrayv, NList, array);
|
24
|
+
return INT2FIX(n_list_size(array));
|
25
|
+
}
|
26
|
+
|
27
|
+
static VALUE t_add(VALUE self, VALUE obj) {
|
28
|
+
NList *array;
|
29
|
+
VALUE arrayv = rb_iv_get(self, "@n_list");
|
30
|
+
Data_Get_Struct(arrayv, NList, array);
|
31
|
+
n_list_add(array, (void*)obj);
|
32
|
+
return Qnil;
|
33
|
+
}
|
34
|
+
|
35
|
+
static VALUE t_get(VALUE self, VALUE obj) {
|
36
|
+
NList *array;
|
37
|
+
VALUE arrayv = rb_iv_get(self, "@n_list");
|
38
|
+
Data_Get_Struct(arrayv, NList, array);
|
39
|
+
return (VALUE)n_list_get(array, FIX2INT(obj));
|
40
|
+
}
|
41
|
+
|
42
|
+
static VALUE t_remove(VALUE self, VALUE start, VALUE finish) {
|
43
|
+
NList *array;
|
44
|
+
VALUE arrayv = rb_iv_get(self, "@n_list");
|
45
|
+
Data_Get_Struct(arrayv, NList, array);
|
46
|
+
int b = FIX2INT(start);
|
47
|
+
int e = FIX2INT(finish);
|
48
|
+
if (n_list_remove(array, b, e)) {
|
49
|
+
int s = n_list_size(array);
|
50
|
+
rb_raise(rb_eArgError, "start = %d, finish = %d with size = %d", b, e, s);
|
51
|
+
}
|
52
|
+
return Qnil;
|
53
|
+
}
|
54
|
+
|
55
|
+
void Init_n_list() {
|
56
|
+
VALUE m_noyes_c = rb_define_module("NoyesC");
|
57
|
+
cArrayList = rb_define_class_under(m_noyes_c, "ArrayList", rb_cObject);
|
58
|
+
rb_define_method(cArrayList, "initialize", t_init, 0);
|
59
|
+
rb_define_method(cArrayList, "size", t_size, 0);
|
60
|
+
rb_define_method(cArrayList, "add", t_add, 1);
|
61
|
+
rb_define_method(cArrayList, "get", t_get, 1);
|
62
|
+
rb_define_method(cArrayList, "remove", t_remove, 2);
|
63
|
+
id_push = rb_intern("push");
|
64
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "noyes.h"
|
3
|
+
#include "rnoyes.h"
|
4
|
+
|
5
|
+
static int id_push;
|
6
|
+
|
7
|
+
VALUE cBentCentMarker;
|
8
|
+
|
9
|
+
static void bent_cent_marker_free(void *p) {
|
10
|
+
free_bent_cent_marker(p);
|
11
|
+
}
|
12
|
+
|
13
|
+
static VALUE t_init(VALUE self) {
|
14
|
+
BentCentMarker *pre = new_bent_cent_marker();
|
15
|
+
VALUE prev = Data_Wrap_Struct(cBentCentMarker, 0, bent_cent_marker_free, pre);
|
16
|
+
rb_iv_set(self, "@bent_cent_marker", prev);
|
17
|
+
return self;
|
18
|
+
}
|
19
|
+
|
20
|
+
static VALUE t_left_shift(VALUE self, VALUE obj) {
|
21
|
+
NMatrix1 *M = v_2_nmatrix1(obj);
|
22
|
+
BentCentMarker *pre;
|
23
|
+
VALUE prev = rb_iv_get(self, "@bent_cent_marker");
|
24
|
+
Data_Get_Struct(prev, BentCentMarker, pre);
|
25
|
+
int res = bent_cent_marker_apply(pre, M);
|
26
|
+
free_nmatrix1(M);
|
27
|
+
return res ? Qtrue : Qfalse;
|
28
|
+
}
|
29
|
+
|
30
|
+
void Init_bent_cent_marker() {
|
31
|
+
VALUE m_noyes_c = rb_define_module("NoyesC");
|
32
|
+
cBentCentMarker = rb_define_class_under(m_noyes_c, "BentCentMarker", rb_cObject);
|
33
|
+
rb_define_method(cBentCentMarker, "initialize", t_init, 0);
|
34
|
+
rb_define_method(cBentCentMarker, "<<", t_left_shift, 1);
|
35
|
+
id_push = rb_intern("push");
|
36
|
+
}
|
@@ -49,6 +49,21 @@ static VALUE t_melcos(VALUE self) {
|
|
49
49
|
return result;
|
50
50
|
}
|
51
51
|
|
52
|
+
static VALUE t_dft(VALUE classmod, VALUE data, VALUE size) {
|
53
|
+
NMatrix1 *M = v_2_nmatrix1(data);
|
54
|
+
NMatrix *R = dft(M->data, M->rows, FIX2INT(size));
|
55
|
+
VALUE result = rb_ary_new2(R->cols);
|
56
|
+
int i;
|
57
|
+
for (i=0;i<R->cols;++i) {
|
58
|
+
VALUE real = rb_float_new(R->data[0][i]);
|
59
|
+
VALUE imag = rb_float_new(R->data[1][i]);
|
60
|
+
rb_ary_store(result, i, rb_complex_new(real, imag));
|
61
|
+
}
|
62
|
+
free_nmatrix1(M);
|
63
|
+
free_nmatrix(R);
|
64
|
+
return result;
|
65
|
+
}
|
66
|
+
|
52
67
|
void Init_dct() {
|
53
68
|
VALUE m_noyes_c = rb_define_module("NoyesC");
|
54
69
|
cDiscreteCosineTransform = rb_define_class_under(m_noyes_c,
|
@@ -56,5 +71,6 @@ void Init_dct() {
|
|
56
71
|
rb_define_method(cDiscreteCosineTransform, "initialize", t_init, -2);
|
57
72
|
rb_define_method(cDiscreteCosineTransform, "<<", t_left_shift, 1);
|
58
73
|
rb_define_method(cDiscreteCosineTransform, "melcos", t_melcos, 0);
|
74
|
+
rb_define_module_function(m_noyes_c, "dft", t_dft, 2);
|
59
75
|
id_push = rb_intern("push");
|
60
76
|
}
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include "n_array_list.h"
|
5
|
+
|
6
|
+
#define NLIST_INITIAL_CAPACITY 10
|
7
|
+
#define NLIST_DELTA_CAPACITY 10
|
8
|
+
#undef TRUE
|
9
|
+
#define TRUE 1
|
10
|
+
#undef FALSE
|
11
|
+
#define FALSE 0
|
12
|
+
|
13
|
+
NList * n_list_new() {
|
14
|
+
NList * self;
|
15
|
+
self = malloc(sizeof(NList));
|
16
|
+
self->capacity = NLIST_INITIAL_CAPACITY;
|
17
|
+
self->data = malloc(sizeof(void*) * self->capacity);
|
18
|
+
self->size = 0;
|
19
|
+
return self;
|
20
|
+
}
|
21
|
+
|
22
|
+
void n_list_free(NList * self) {
|
23
|
+
free(self->data);
|
24
|
+
free(self);
|
25
|
+
}
|
26
|
+
|
27
|
+
int n_list_add(NList * self, void * object) {
|
28
|
+
int old_size = n_list_size(self);
|
29
|
+
int new_capacity;
|
30
|
+
void ** new_data;
|
31
|
+
|
32
|
+
(self->size)++;
|
33
|
+
if (old_size == self->capacity) {
|
34
|
+
new_capacity = self->capacity + NLIST_DELTA_CAPACITY;
|
35
|
+
new_data = malloc(sizeof(void*) * new_capacity);
|
36
|
+
memcpy(new_data, self->data, sizeof(void*) * old_size);
|
37
|
+
free(self->data);
|
38
|
+
(self->data) = new_data;
|
39
|
+
self->capacity = new_capacity;
|
40
|
+
}
|
41
|
+
self->data[old_size] = object;
|
42
|
+
return TRUE;
|
43
|
+
}
|
44
|
+
|
45
|
+
int n_list_remove(NList * self, int start, int finish) {
|
46
|
+
if (start > finish || finish > self->size)
|
47
|
+
return 1;
|
48
|
+
|
49
|
+
memmove(self->data + start, self->data + finish,
|
50
|
+
sizeof(void*) * (self->size - finish));
|
51
|
+
self->size = self->size - (finish - start);
|
52
|
+
if (self->size < self->capacity - 2 * NLIST_DELTA_CAPACITY) {
|
53
|
+
int new_capacity = self->size + NLIST_DELTA_CAPACITY;
|
54
|
+
self->data = realloc(self->data, sizeof(void*) * new_capacity);
|
55
|
+
self->capacity = new_capacity;
|
56
|
+
}
|
57
|
+
return 0;
|
58
|
+
}
|
59
|
+
|
60
|
+
void * n_list_get(const NList * self, const int index) {
|
61
|
+
if (index < 0 || index > self->size)
|
62
|
+
return NULL;
|
63
|
+
return self->data[index];
|
64
|
+
}
|
65
|
+
|
66
|
+
int n_list_is_empty(const NList * self) {
|
67
|
+
return 0 == n_list_size(self);
|
68
|
+
}
|
69
|
+
|
70
|
+
int n_list_size(const NList * self) {
|
71
|
+
return self->size;
|
72
|
+
}
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#ifndef _N_ARRAY_LIST_H_
|
2
|
+
#define _N_ARRAY_LIST_H_
|
3
|
+
|
4
|
+
typedef struct {
|
5
|
+
int capacity;
|
6
|
+
void **data;
|
7
|
+
int size;
|
8
|
+
} NList;
|
9
|
+
|
10
|
+
NList * n_list_new();
|
11
|
+
void n_list_free(NList * self);
|
12
|
+
int n_list_size(const NList * self);
|
13
|
+
int n_list_add(NList * self, void * object);
|
14
|
+
int n_list_remove(NList * self, int start, int finish);
|
15
|
+
void * n_list_get(const NList * self, const int index);
|
16
|
+
int n_list_is_empty(const NList * self);
|
17
|
+
|
18
|
+
#endif
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#include "noyes.h"
|
2
|
+
#include "math.h"
|
3
|
+
#include "stdlib.h"
|
4
|
+
|
5
|
+
BentCentMarker * new_bent_cent_marker() {
|
6
|
+
BentCentMarker *self = malloc(sizeof(BentCentMarker));
|
7
|
+
self->adjustment = 0.003;
|
8
|
+
self->average_number = 1.0;
|
9
|
+
self->background = 100.0;
|
10
|
+
self->level = 0.0;
|
11
|
+
self->min_signal = 0.0;
|
12
|
+
self->threshold = 10.0;
|
13
|
+
return self;
|
14
|
+
}
|
15
|
+
|
16
|
+
void free_bent_cent_marker(BentCentMarker *self) {
|
17
|
+
free(self);
|
18
|
+
}
|
19
|
+
|
20
|
+
double bent_cent_log_rms(BentCentMarker *self, NMatrix1 *pcm) {
|
21
|
+
double sum_of_squares = 0.0;
|
22
|
+
int i;
|
23
|
+
for (i=0;i<pcm->rows;++i) {
|
24
|
+
sum_of_squares += pcm->data[i] * pcm->data[i];
|
25
|
+
}
|
26
|
+
double rms = sqrt(sum_of_squares/pcm->rows);
|
27
|
+
rms = fmax(rms,1.0);
|
28
|
+
return log(rms) * 20;
|
29
|
+
}
|
30
|
+
|
31
|
+
int bent_cent_marker_apply(BentCentMarker *self, NMatrix1 *pcm) {
|
32
|
+
int is_speech = 0;
|
33
|
+
double current = bent_cent_log_rms(self, pcm);
|
34
|
+
if (current >= self->min_signal) {
|
35
|
+
self->level = ((self->level * self->average_number) + current) /
|
36
|
+
(self->average_number + 1);
|
37
|
+
if (current < self->background) {
|
38
|
+
self->background = current;
|
39
|
+
} else {
|
40
|
+
self->background += (current - self->background) * self->adjustment;
|
41
|
+
}
|
42
|
+
if (self->level < self->background) {
|
43
|
+
self->level = self->background;
|
44
|
+
}
|
45
|
+
is_speech = self->level - self->background > self->threshold;
|
46
|
+
}
|
47
|
+
return is_speech;
|
48
|
+
}
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#include "noyes.h"
|
2
|
+
#undef TRUE
|
3
|
+
#define TRUE 1
|
4
|
+
#undef FALSE
|
5
|
+
#define FALSE 0
|
6
|
+
|
7
|
+
SpeechTrimmer * new_speech_trimmer() {
|
8
|
+
SpeechTrimmer *self = malloc(sizeof(SpeechTrimmer));
|
9
|
+
self->leader = 5;
|
10
|
+
self->trailer = 5;
|
11
|
+
self->speech_started = FALSE;
|
12
|
+
self->bcm = new_bent_cent_marker();
|
13
|
+
self->false_count = 0;
|
14
|
+
self->true_count = 0;
|
15
|
+
self->queue = n_list_new();
|
16
|
+
self->eos_reached = FALSE;
|
17
|
+
self->scs = 20;
|
18
|
+
self->ecs = 50;
|
19
|
+
return self;
|
20
|
+
}
|
21
|
+
|
22
|
+
void free_speech_trimmer(SpeechTrimmer *self) {
|
23
|
+
free_bent_cent_marker(self->bcm);
|
24
|
+
n_list_free(self->queue);
|
25
|
+
free(self);
|
26
|
+
}
|
27
|
+
|
28
|
+
void speech_trimmer_enqueue(SpeechTrimmer *self, NMatrix1* pcm) {
|
29
|
+
if (self->eos_reached)
|
30
|
+
return;
|
31
|
+
n_list_add(self->queue, pcm);
|
32
|
+
if (bent_cent_marker_apply(self->bcm, pcm)) {
|
33
|
+
self->false_count = 0;
|
34
|
+
self->true_count += 1;
|
35
|
+
} else {
|
36
|
+
self->false_count += 1;
|
37
|
+
self->true_count = 0;
|
38
|
+
}
|
39
|
+
if (self->speech_started) {
|
40
|
+
if (self->false_count == self->ecs) {
|
41
|
+
self->eos_reached = TRUE;
|
42
|
+
int new_size = n_list_size(self->queue) - self->ecs + self->trailer;
|
43
|
+
n_list_remove(self->queue, new_size, n_list_size(self->queue));
|
44
|
+
}
|
45
|
+
} else if (self->true_count > self->scs) {
|
46
|
+
if (self->leader + self->scs < n_list_size(self->queue)) {
|
47
|
+
int start = n_list_size(self->queue) - self->leader - self->scs - 1;
|
48
|
+
n_list_remove(self->queue, 0, start);
|
49
|
+
}
|
50
|
+
self->speech_started = TRUE;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
NMatrix1 * speech_trimmer_dequeue(SpeechTrimmer *self) {
|
55
|
+
if (n_list_size(self->queue) == 0)
|
56
|
+
return NULL;
|
57
|
+
if (self->eos_reached || (self->speech_started &&
|
58
|
+
n_list_size(self->queue) > self->ecs)) {
|
59
|
+
NMatrix1 * N = n_list_get(self->queue, 0);
|
60
|
+
n_list_remove(self->queue, 0, 1);
|
61
|
+
return N;
|
62
|
+
}
|
63
|
+
return NULL;
|
64
|
+
}
|
65
|
+
int speech_trimmer_eos(SpeechTrimmer *self) {
|
66
|
+
return self->eos_reached;
|
67
|
+
}
|
data/lib/c_impl/noyes.h
CHANGED
@@ -145,6 +145,42 @@ Fast8kMfcc* new_fast_8k_mfcc();
|
|
145
145
|
void free_fast_8k_mfcc(Fast8kMfcc *self);
|
146
146
|
NMatrix *fast_8k_mfcc_apply(Fast8kMfcc *self, NMatrix1 *data);
|
147
147
|
|
148
|
+
// Silence removal with BentCentMarker and SpeechTrimmer
|
149
|
+
typedef struct {
|
150
|
+
double adjustment;
|
151
|
+
double average_number;
|
152
|
+
double background;
|
153
|
+
double level;
|
154
|
+
double min_signal;
|
155
|
+
double threshold;
|
156
|
+
} BentCentMarker;
|
157
|
+
|
158
|
+
BentCentMarker * new_bent_cent_marker();
|
159
|
+
void free_bent_cent_marker(BentCentMarker *self);
|
160
|
+
double bent_cent_marker_log_rms(BentCentMarker *self, NMatrix1 *data);
|
161
|
+
int bent_cent_marker_apply(BentCentMarker *self, NMatrix1 *data);
|
162
|
+
|
163
|
+
#include "n_array_list.h"
|
164
|
+
|
165
|
+
typedef struct {
|
166
|
+
int leader;
|
167
|
+
int trailer;
|
168
|
+
int speech_started;
|
169
|
+
int false_count;
|
170
|
+
int true_count;
|
171
|
+
int scs;
|
172
|
+
int ecs;
|
173
|
+
BentCentMarker *bcm;
|
174
|
+
NList *queue;
|
175
|
+
int eos_reached;
|
176
|
+
} SpeechTrimmer;
|
177
|
+
|
178
|
+
SpeechTrimmer * new_speech_trimmer();
|
179
|
+
void free_speech_trimmer(SpeechTrimmer *self);
|
180
|
+
void speech_trimmer_enqueue(SpeechTrimmer *self, NMatrix1* pcm);
|
181
|
+
NMatrix1 * speech_trimmer_dequeue(SpeechTrimmer *self);
|
182
|
+
int speech_trimmer_eos(SpeechTrimmer *self);
|
183
|
+
|
148
184
|
#ifdef __cplusplus
|
149
185
|
}
|
150
186
|
#endif
|
data/lib/c_impl/noyes_c.c
CHANGED
data/lib/c_impl/rnoyes.h
CHANGED
@@ -0,0 +1,52 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "noyes.h"
|
3
|
+
#include "rnoyes.h"
|
4
|
+
|
5
|
+
static int id_push;
|
6
|
+
|
7
|
+
VALUE cSpeechTrimmer;
|
8
|
+
|
9
|
+
static void speech_trimmer_free(void *p) {
|
10
|
+
free_speech_trimmer(p);
|
11
|
+
}
|
12
|
+
|
13
|
+
static VALUE t_init(VALUE self) {
|
14
|
+
SpeechTrimmer *st = new_speech_trimmer();
|
15
|
+
VALUE stv = Data_Wrap_Struct(cSpeechTrimmer, 0, speech_trimmer_free, st);
|
16
|
+
rb_iv_set(self, "@speech_trimmer", stv);
|
17
|
+
return self;
|
18
|
+
}
|
19
|
+
|
20
|
+
static VALUE t_enqueue(VALUE self, VALUE obj) {
|
21
|
+
NMatrix1 *M = v_2_nmatrix1(obj);
|
22
|
+
SpeechTrimmer *st;
|
23
|
+
Data_Get_Struct(rb_iv_get(self, "@speech_trimmer"), SpeechTrimmer, st);
|
24
|
+
speech_trimmer_enqueue(st, M);
|
25
|
+
return Qnil;
|
26
|
+
}
|
27
|
+
|
28
|
+
static VALUE t_dequeue(VALUE self) {
|
29
|
+
SpeechTrimmer *st;
|
30
|
+
Data_Get_Struct(rb_iv_get(self, "@speech_trimmer"), SpeechTrimmer, st);
|
31
|
+
NMatrix1 *N =speech_trimmer_dequeue(st);
|
32
|
+
VALUE result = nmatrix1_2_v(N);
|
33
|
+
free_nmatrix1(N);
|
34
|
+
return result;
|
35
|
+
}
|
36
|
+
|
37
|
+
static VALUE t_eos(VALUE self) {
|
38
|
+
SpeechTrimmer *st;
|
39
|
+
VALUE stv = rb_iv_get(self, "@speech_trimmer");
|
40
|
+
Data_Get_Struct(stv, SpeechTrimmer, st);
|
41
|
+
return speech_trimmer_dequeue(st) ? Qtrue : Qfalse;
|
42
|
+
}
|
43
|
+
|
44
|
+
void Init_speech_trimmer() {
|
45
|
+
VALUE m_noyes_c = rb_define_module("NoyesC");
|
46
|
+
cSpeechTrimmer = rb_define_class_under(m_noyes_c, "SpeechTrimmer", rb_cObject);
|
47
|
+
rb_define_method(cSpeechTrimmer, "initialize", t_init, 0);
|
48
|
+
rb_define_method(cSpeechTrimmer, "enqueue", t_enqueue, 1);
|
49
|
+
rb_define_method(cSpeechTrimmer, "dequeue", t_dequeue, 0);
|
50
|
+
rb_define_method(cSpeechTrimmer, "eos?", t_eos, 0);
|
51
|
+
id_push = rb_intern("push");
|
52
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Math
|
2
|
+
def self.max a, b
|
3
|
+
a > b ? a : b
|
4
|
+
end
|
5
|
+
def self.min a, b
|
6
|
+
a < b ? a : b
|
7
|
+
end
|
8
|
+
end
|
9
|
+
# I don't really undestand why Ruby 1.9 needs this. It seems that Math gets
|
10
|
+
# redefine to CMath at some point. So calling Math.max will fail in 1.9 unless
|
11
|
+
# I put these functions in CMath too.
|
12
|
+
module CMath
|
13
|
+
def self.max a, b
|
14
|
+
a > b ? a : b
|
15
|
+
end
|
16
|
+
def self.min a, b
|
17
|
+
a < b ? a : b
|
18
|
+
end
|
19
|
+
end
|
data/lib/common.rb
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
module NoyesJava
|
2
|
+
class SpeechTrimmer
|
3
|
+
def initialize
|
4
|
+
@st = Java::talkhouse.SpeechTrimmer.new
|
5
|
+
end
|
6
|
+
def enqueue pcm
|
7
|
+
@st.enqueue pcm.to_java(Java::double)
|
8
|
+
end
|
9
|
+
def dequeue
|
10
|
+
speech = @st.dequeue
|
11
|
+
speech.to_a if speech
|
12
|
+
end
|
13
|
+
def eos?
|
14
|
+
@st.eos
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
data/lib/noyes.rb
CHANGED
data/lib/noyes_java.rb
CHANGED
@@ -0,0 +1,40 @@
|
|
1
|
+
module Noyes
|
2
|
+
# Determines whether a PCM frame is speech or not using Bent
|
3
|
+
# Schmidt-Nielsen's algorithm. Basically, it's an energy-based detector
|
4
|
+
# where the background noise level is constantly estimated.
|
5
|
+
#
|
6
|
+
# The pcm data should be in 100 millisecond chunks. For example,
|
7
|
+
# At 8000 Hz there should 80 frames of pcm.
|
8
|
+
class BentCentMarker
|
9
|
+
def initialize
|
10
|
+
@adjustment = 0.003
|
11
|
+
@average_number = 1.0
|
12
|
+
@background = 100.0
|
13
|
+
@level = 0.0
|
14
|
+
@min_signal = 0.0
|
15
|
+
@threshold = 10.0
|
16
|
+
end
|
17
|
+
def logrms pcm
|
18
|
+
sum_of_squares = 0.0
|
19
|
+
pcm.each {|sample| sum_of_squares += sample * sample}
|
20
|
+
rms = Math.sqrt sum_of_squares / pcm.size;
|
21
|
+
rms = Math.max rms, 1
|
22
|
+
Math.log(rms) * 20
|
23
|
+
end
|
24
|
+
def << pcm
|
25
|
+
is_speech = false
|
26
|
+
current = logrms pcm
|
27
|
+
if current >= @min_signal
|
28
|
+
@level = ((@level * @average_number) + current) / (@average_number + 1)
|
29
|
+
if current < @background
|
30
|
+
@background = current
|
31
|
+
else
|
32
|
+
@background += (current - @background) * @adjustment
|
33
|
+
end
|
34
|
+
@level = @background if (@level < @background)
|
35
|
+
is_speech = @level - @background > @threshold
|
36
|
+
end
|
37
|
+
is_speech
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Noyes
|
2
|
+
# SpeechTrimmer trims non-speech from both ends of an audio stream. Each time
|
3
|
+
# you enqueue audio into it you should dequeue audio out of it until dequeue
|
4
|
+
# returns nil. Then check for eos. If eos is true you are done.
|
5
|
+
# SpeechTrimmer is designed to work efficiently with live audio.
|
6
|
+
class SpeechTrimmer
|
7
|
+
def initialize
|
8
|
+
@leader = 5 # Cents of leading silence to retain.
|
9
|
+
@trailer = 5 # Cents of trailing silence to retain.
|
10
|
+
@speech_started = false
|
11
|
+
@cent_marker = BentCentMarker.new
|
12
|
+
@false_count=0
|
13
|
+
@true_count=0
|
14
|
+
@queue = []
|
15
|
+
@eos_reached = false
|
16
|
+
@scs = 20 # Centiseconds of speech before detection of utterance.
|
17
|
+
@ecs = 50 # Centiseconds of silence before end detection.
|
18
|
+
end
|
19
|
+
|
20
|
+
def enqueue pcm
|
21
|
+
return if @eos_reached
|
22
|
+
@queue << pcm
|
23
|
+
if @cent_marker << pcm
|
24
|
+
@false_count = 0
|
25
|
+
@true_count += 1
|
26
|
+
else
|
27
|
+
@false_count += 1
|
28
|
+
@true_count = 0
|
29
|
+
end
|
30
|
+
if @speech_started
|
31
|
+
if @false_count == @ecs
|
32
|
+
@eos_reached = true
|
33
|
+
# only keep trailer number of cents once eos is detected.
|
34
|
+
@queue = @queue[0, @queue.size - @ecs + @trailer]
|
35
|
+
end
|
36
|
+
elsif @true_count > @scs
|
37
|
+
# Discard most begining silence, keeping just a tad.
|
38
|
+
if @leader + @scs < @queue.size
|
39
|
+
start = @queue.size - @leader - 1 - @scs
|
40
|
+
@queue = @queue[start,@queue.size - start]
|
41
|
+
end
|
42
|
+
@speech_started = true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
def dequeue
|
46
|
+
if @eos_reached || (@speech_started && @queue.size > @ecs)
|
47
|
+
@queue.shift
|
48
|
+
end
|
49
|
+
end
|
50
|
+
def eos?
|
51
|
+
@eos_reached
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/ship/noyes.jar
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: noyes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joe Woelfel
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-06-22 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -37,6 +37,8 @@ extra_rdoc_files:
|
|
37
37
|
- README
|
38
38
|
files:
|
39
39
|
- VERSION
|
40
|
+
- lib/c_impl/array_list.c
|
41
|
+
- lib/c_impl/bent_cent_marker.c
|
40
42
|
- lib/c_impl/discrete_cosine_transform.c
|
41
43
|
- lib/c_impl/extconf.rb
|
42
44
|
- lib/c_impl/fast_8k_mfcc.c
|
@@ -44,6 +46,9 @@ files:
|
|
44
46
|
- lib/c_impl/live_cmn.c
|
45
47
|
- lib/c_impl/log_compressor.c
|
46
48
|
- lib/c_impl/mel_filter.c
|
49
|
+
- lib/c_impl/n_array_list.c
|
50
|
+
- lib/c_impl/n_array_list.h
|
51
|
+
- lib/c_impl/n_bent_cent_marker.c
|
47
52
|
- lib/c_impl/n_dft.c
|
48
53
|
- lib/c_impl/n_discrete_cosine_transform.c
|
49
54
|
- lib/c_impl/n_fast_8k_mfcc.c
|
@@ -55,12 +60,14 @@ files:
|
|
55
60
|
- lib/c_impl/n_power_spec.c
|
56
61
|
- lib/c_impl/n_preemphasis.c
|
57
62
|
- lib/c_impl/n_segmenter.c
|
63
|
+
- lib/c_impl/n_speech_trimmer.c
|
58
64
|
- lib/c_impl/noyes.h
|
59
65
|
- lib/c_impl/noyes_c.c
|
60
66
|
- lib/c_impl/power_spectrum.c
|
61
67
|
- lib/c_impl/preemphasis.c
|
62
68
|
- lib/c_impl/rnoyes.h
|
63
69
|
- lib/c_impl/segmenter.c
|
70
|
+
- lib/c_impl/speech_trimmer.c
|
64
71
|
- lib/common.rb
|
65
72
|
- lib/common/file2pcm.rb
|
66
73
|
- lib/common/mock_noyes_server.rb
|
@@ -68,8 +75,10 @@ files:
|
|
68
75
|
- lib/common/noyes_math.rb
|
69
76
|
- lib/common/noyes_protocol.rb
|
70
77
|
- lib/common/parallel_filter.rb
|
78
|
+
- lib/common/ruby_ext.rb
|
71
79
|
- lib/common/send_incrementally.rb
|
72
80
|
- lib/common/serial_filter.rb
|
81
|
+
- lib/java_impl/bent_cent_marker.rb
|
73
82
|
- lib/java_impl/dct.rb
|
74
83
|
- lib/java_impl/delta.rb
|
75
84
|
- lib/java_impl/discrete_fourier_transform.rb
|
@@ -82,9 +91,11 @@ files:
|
|
82
91
|
- lib/java_impl/power_spec.rb
|
83
92
|
- lib/java_impl/preemphasis.rb
|
84
93
|
- lib/java_impl/segment.rb
|
94
|
+
- lib/java_impl/speech_trimmer.rb
|
85
95
|
- lib/noyes.rb
|
86
96
|
- lib/noyes_c.rb
|
87
97
|
- lib/noyes_java.rb
|
98
|
+
- lib/ruby_impl/bent_cent_marker.rb
|
88
99
|
- lib/ruby_impl/dct.rb
|
89
100
|
- lib/ruby_impl/delta.rb
|
90
101
|
- lib/ruby_impl/discrete_fourier_transform.rb
|
@@ -96,6 +107,7 @@ files:
|
|
96
107
|
- lib/ruby_impl/power_spec.rb
|
97
108
|
- lib/ruby_impl/preemphasis.rb
|
98
109
|
- lib/ruby_impl/segment.rb
|
110
|
+
- lib/ruby_impl/speech_trimmer.rb
|
99
111
|
- ship/noyes.jar
|
100
112
|
- COPYING
|
101
113
|
- FAQ
|