noyes 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ #include "noyes.h"
2
+ #include "rnoyes.h"
3
+
4
+ // Wrappers for matrix class
5
+ NMatrix * v_2_nmatrix(VALUE value) {
6
+ NMatrix *M = NULL;
7
+ int rows = RARRAY_LEN(value);
8
+ int cols = 0;
9
+ if (rows > 0) {
10
+ VALUE colzero = rb_ary_entry(value, 0);
11
+ colzero = rb_check_array_type(colzero);
12
+ if (NIL_P(colzero)) {
13
+ rb_raise(rb_eTypeError, "Matrix one dimensional instead of two");
14
+ }
15
+ cols = RARRAY_LEN(colzero);
16
+ M = new_nmatrix(rows,cols);
17
+ int i,j;
18
+ for (i=0;i<rows;++i) {
19
+ VALUE col = rb_ary_entry(value, i);
20
+ for (j=0;j<cols;++j) {
21
+ M->data[i][j] = NUM2DBL(rb_ary_entry(col, j));
22
+ }
23
+ }
24
+ }
25
+ return M;
26
+ }
27
+
28
+ VALUE nmatrix_2_v(NMatrix *M) {
29
+ VALUE v = Qnil;
30
+ if (M) {
31
+ v = rb_ary_new2(M->rows);
32
+ int i, j;
33
+ for (i=0;i<M->rows;++i) {
34
+ VALUE col = rb_ary_new2(M->cols);
35
+ rb_ary_store(v, i, col);
36
+ for (j=0;j<M->cols;++j) {
37
+ rb_ary_store(col, j, rb_float_new(M->data[i][j]));
38
+ }
39
+ }
40
+ }
41
+
42
+ return v;
43
+ }
44
+
45
+ NMatrix1 * v_2_nmatrix1(VALUE value) {
46
+ NMatrix1 *M = NULL;
47
+ int rows = RARRAY_LEN(value);
48
+ if (rows > 0) {
49
+ VALUE colzero = rb_ary_entry(value, 0);
50
+ colzero = rb_check_array_type(colzero);
51
+ if (!NIL_P(colzero)) {
52
+ rb_raise(rb_eTypeError, "Matrix two dimensional instead of one");
53
+ }
54
+ M = new_nmatrix1(rows);
55
+ int i;
56
+ for (i=0;i<rows;++i) {
57
+ VALUE val = rb_ary_entry(value, i);
58
+ M->data[i] = NUM2DBL(val);
59
+ }
60
+ }
61
+ return M;
62
+ }
63
+
64
+ VALUE nmatrix1_2_v(NMatrix1 *M) {
65
+ VALUE v = Qnil;
66
+ if (M) {
67
+ v = rb_ary_new2(M->rows);
68
+ int i;
69
+ for (i=0;i<M->rows;++i) {
70
+ rb_ary_store(v, i, rb_float_new(M->data[i]));
71
+ }
72
+ }
73
+
74
+ return v;
75
+ }
76
+
77
+ // Initialize Ruby API wrappers.
78
+ void Init_noyes_c() {
79
+ Init_segmenter();
80
+ Init_preemphasis();
81
+ Init_hamming_window();
82
+ Init_power_spectrum();
83
+ Init_mel_filter();
84
+ Init_log_compressor();
85
+ Init_live_cmn();
86
+ Init_fast_8k_mfcc();
87
+ Init_dct();
88
+ }
@@ -0,0 +1,42 @@
1
+ #include "ruby.h"
2
+ #include "noyes.h"
3
+ #include "rnoyes.h"
4
+
5
+ static int id_push;
6
+
7
+ VALUE cPowerSpectrum;
8
+
9
+ static void power_spectrum_free(void *p) {
10
+ free_power_spectrum(p);
11
+ }
12
+
13
+ static VALUE t_init(VALUE self, VALUE args) {
14
+ int len = RARRAY_LEN(args);
15
+ int nfft = 256;
16
+ if (len > 0) {
17
+ nfft = NUM2INT(rb_ary_entry(args, 0));
18
+ }
19
+ PowerSpectrum *ps = new_power_spectrum(nfft);
20
+ VALUE psv = Data_Wrap_Struct(cPowerSpectrum, 0, power_spectrum_free, ps);
21
+ rb_iv_set(self, "@ps", psv);
22
+ return self;
23
+ }
24
+
25
+ static VALUE t_left_shift(VALUE self, VALUE obj) {
26
+ NMatrix *M = v_2_nmatrix(obj);
27
+ PowerSpectrum *ps;
28
+ VALUE psv = rb_iv_get(self, "@ps");
29
+ Data_Get_Struct(psv, PowerSpectrum, ps);
30
+ NMatrix *N = power_spectrum_apply(ps, M);
31
+ VALUE result = nmatrix_2_v(N);
32
+ free_nmatrix(N);
33
+ return result;
34
+ }
35
+
36
+ void Init_power_spectrum() {
37
+ VALUE m_noyes_c = rb_define_module("NoyesC");
38
+ cPowerSpectrum = rb_define_class_under(m_noyes_c, "PowerSpectrumFilter", rb_cObject);
39
+ rb_define_method(cPowerSpectrum, "initialize", t_init, -2);
40
+ rb_define_method(cPowerSpectrum, "<<", t_left_shift, 1);
41
+ id_push = rb_intern("push");
42
+ }
@@ -0,0 +1,43 @@
1
+ #include "ruby.h"
2
+ #include "noyes.h"
3
+ #include "rnoyes.h"
4
+
5
+ static int id_push;
6
+
7
+ VALUE cPreemphasizer;
8
+
9
+ static void preemphasizer_free(void *p) {
10
+ free_preemphasizer(p);
11
+ }
12
+
13
+ static VALUE t_init(VALUE self, VALUE args) {
14
+ int len = RARRAY_LEN(args);
15
+ double factor = 0.97;
16
+ if (len > 0) {
17
+ factor = NUM2DBL(rb_ary_entry(args, 0));
18
+ }
19
+ Preemphasizer *pre = new_preemphasizer(factor);
20
+ VALUE prev = Data_Wrap_Struct(cPreemphasizer, 0, preemphasizer_free, pre);
21
+ rb_iv_set(self, "@preemphasizer", prev);
22
+ return self;
23
+ }
24
+
25
+ static VALUE t_left_shift(VALUE self, VALUE obj) {
26
+ NMatrix1 *M = v_2_nmatrix1(obj);
27
+ Preemphasizer *pre;
28
+ VALUE prev = rb_iv_get(self, "@preemphasizer");
29
+ Data_Get_Struct(prev, Preemphasizer, pre);
30
+ NMatrix1 *N = preemphasizer_apply(pre, M);
31
+ VALUE result = nmatrix1_2_v(N);
32
+ free_nmatrix1(N);
33
+ free_nmatrix1(M);
34
+ return result;
35
+ }
36
+
37
+ void Init_preemphasis() {
38
+ VALUE m_noyes_c = rb_define_module("NoyesC");
39
+ cPreemphasizer = rb_define_class_under(m_noyes_c, "Preemphasizer", rb_cObject);
40
+ rb_define_method(cPreemphasizer, "initialize", t_init, -2);
41
+ rb_define_method(cPreemphasizer, "<<", t_left_shift, 1);
42
+ id_push = rb_intern("push");
43
+ }
@@ -0,0 +1,17 @@
1
+ // Wrapper stuff. Only ruby related stuff below here.
2
+ #include "ruby.h"
3
+ void Init_preemphasis();
4
+ void Init_segmenter();
5
+ void Init_hamming_window();
6
+ void Init_power_spectrum();
7
+ void Init_mel_filter();
8
+ void Init_log_compressor();
9
+ void Init_live_cmn();
10
+ void Init_fast_8k_mfcc();
11
+ void Init_dct();
12
+
13
+ VALUE nmatrix_2_v(NMatrix *d);
14
+ NMatrix * v_2_nmatrix(VALUE value);
15
+
16
+ VALUE nmatrix1_2_v(NMatrix1 *d);
17
+ NMatrix1 * v_2_nmatrix1(VALUE value);
@@ -0,0 +1,48 @@
1
+ #include "ruby.h"
2
+ #include "noyes.h"
3
+ #include "rnoyes.h"
4
+
5
+ static int id_push;
6
+
7
+ VALUE cSegmenter;
8
+
9
+ static void segmenter_free(void *p) {
10
+ free_segmenter(p);
11
+ }
12
+ static VALUE t_init(VALUE self, VALUE args) {
13
+ int winsz = 205;
14
+ int winshift = 80;
15
+ int len = RARRAY_LEN(args);
16
+ if (len > 0) {
17
+ winsz = NUM2INT(rb_ary_entry(args, 0));
18
+ }
19
+ if (len > 1) {
20
+ winshift = NUM2INT(rb_ary_entry(args, 1));
21
+ }
22
+
23
+ Segmenter *s = new_segmenter(winsz, winshift);
24
+ VALUE segmenter = Data_Wrap_Struct(cSegmenter, 0, segmenter_free, s);
25
+ rb_iv_set(self, "@segmenter", segmenter);
26
+
27
+ return self;
28
+ }
29
+
30
+ static VALUE t_left_shift(VALUE self, VALUE obj) {
31
+ NMatrix1 *M = v_2_nmatrix1(obj);
32
+ VALUE segmenter = rb_iv_get(self, "@segmenter");
33
+ Segmenter *s;
34
+ Data_Get_Struct(segmenter, Segmenter, s);
35
+ NMatrix *N = segmenter_apply(s, M);
36
+ VALUE result = nmatrix_2_v(N);
37
+ free_nmatrix(N);
38
+ free_nmatrix1(M);
39
+ return result;
40
+ }
41
+
42
+ void Init_segmenter() {
43
+ VALUE m_noyes_c = rb_define_module("NoyesC");
44
+ cSegmenter = rb_define_class_under(m_noyes_c, "Segmenter", rb_cObject);
45
+ rb_define_method(cSegmenter, "initialize", t_init, -2);
46
+ rb_define_method(cSegmenter, "<<", t_left_shift, 1);
47
+ id_push = rb_intern("push");
48
+ }
@@ -0,0 +1,9 @@
1
+ def file2pcm file, bits, freq
2
+ raw = `sox #{file} -s -B -r #{freq} -b #{bits} -t raw -`
3
+ length = bits.to_i # bits
4
+ max = 2**length-1
5
+ mid = 2**(length-1)
6
+ to_signed = proc {|n| (n>=mid) ? -((n ^ max) + 1) : n}
7
+ unpacked = raw.unpack 'n*'
8
+ unpacked.map{|d| to_signed[d].to_f}
9
+ end
@@ -0,0 +1,102 @@
1
+ require 'socket'
2
+ require 'noyes_protocol'
3
+ require 'fcntl'
4
+ class MockNoyesServer
5
+ class Session
6
+ attr_accessor :file
7
+ attr_accessor :data
8
+ attr_accessor :magic
9
+
10
+ def initialize file
11
+ @file = file
12
+ @data = ''
13
+ @magic = false
14
+ end
15
+ end
16
+ def initialize options
17
+ @server_socket = TCPServer.new('', options.port)
18
+ @server_socket.setsockopt Socket::SOL_SOCKET, Socket::SO_REUSEADDR, 1
19
+ @descriptors = [@server_socket]
20
+ @sessions = {}
21
+ @file_counter = 0
22
+ @verbose = true
23
+ end
24
+ def run
25
+ while true
26
+ res = select(@descriptors, nil, nil, nil)
27
+ if res
28
+ res[0].each do |sock|
29
+ if sock == @server_socket
30
+ accept_new_connection
31
+ else
32
+ process_available_data sock
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+
39
+ def accept_new_connection
40
+ puts "Accepting new connection."
41
+ newsock = @server_socket.accept
42
+ @descriptors.push newsock
43
+ session = Session.new open("session_#{@file_counter+=1}.raw", 'w')
44
+ @sessions[newsock] = session
45
+ end
46
+
47
+ def process_available_data sock
48
+ msg, from = sock.recvfrom 1024
49
+ return if msg.size == 0
50
+ session = @sessions[sock]
51
+ session.data << msg
52
+
53
+ if !session.magic
54
+ if session.data =~ /^#{TMAGIC}/
55
+ session.magic = true
56
+ session.data.slice! 0, TMAGIC.size
57
+ end
58
+ end
59
+
60
+ id = session.data.slice(0,4)
61
+ id = session.data.slice!(0,4) if id == TSTART
62
+
63
+ # We just don't really do anything with the cepstra in the mock server.
64
+ cepstra = []
65
+ while id == TCEPSTRA && session.data.size >=8
66
+ cep_count = 13 * session.data.slice(4,4).unpack('N')[0]
67
+ puts "cep_count = #{cep_count}" if @verbose
68
+ break unless cep_count * 4 + TCEPSTRA.size + 4 <= session.data.size
69
+ session.data.slice!(0,8)
70
+ cepstra.push session.data.slice!(0,cep_count * 4).unpack('g*')
71
+ puts "cepval = #{cepstra.last}" if @verbose
72
+ id = session.data.slice(0,4)
73
+ end
74
+ while (id == TA16_44 || id == TA16_16) && session.data.size >=8
75
+ count = session.data.slice(4,4).unpack('N')[0]
76
+ break unless count * 2 + TA16_44.size + 4 <= session.data.size
77
+ print '.'
78
+ puts "count = #{count}"
79
+ session.data.slice!(0,8)
80
+ audio = session.data.slice!(0,count*2).unpack('n*')
81
+ session.file.write audio.pack 'n*'
82
+ id = session.data.slice(0,4)
83
+ end
84
+ puts "id = #{id.unpack 'N'}" if @verbose
85
+ if id == TEND
86
+ puts "Connection closed."
87
+ session.file.flush
88
+ session.file.close
89
+ session.data.slice!(0,4)
90
+ sock.puts 'new england patriots'
91
+ close_socket sock
92
+ end
93
+ session.data.slice!(0,4) if id == TBYE
94
+ rescue IOError => e
95
+ puts "Connection IOError"
96
+ end
97
+ def close_socket sock
98
+ @descriptors.delete sock
99
+ @sessions.delete sock
100
+ sock.close
101
+ end
102
+ end
@@ -0,0 +1,10 @@
1
+ TMAGIC = '1.0 talkhouse'
2
+
3
+ # The following constants are packed as 4 byte big-endian integers.
4
+ TSTART = [0].pack('N')
5
+ #TAUDIO = [1].pack('N')
6
+ TEND = [2].pack('N')
7
+ TBYE = [3].pack('N')
8
+ TCEPSTRA = [4].pack('N')
9
+ TA16_16 = [5].pack('N')
10
+ TA16_44 = [6].pack('N')
@@ -1,33 +1,32 @@
1
1
  require 'noyes'
2
+ require 'common/file2pcm'
2
3
  include Noyes
3
4
 
4
- TMAGIC = '1.0 talkhouse'
5
- TSTART = [0].pack('N')
6
- TAUDIO = [1].pack('N')
7
- TEND = [2].pack('N')
8
- TDONE = [3].pack('N')
9
- TCEPSTRA = [4].pack('N')
5
+ # The following flags are in network byte order (big endian) and are 4 bytes
6
+ # long.
7
+ #
8
+ # The following is pseudo code for a transmitting audio
9
+ # send TMAGIC # Magic number
10
+ # send TSTART # Start of microphone
11
+ # while more audio
12
+ # send TAUDIO_16b_8k
13
+ # send length of array (in values, not bytes)
14
+ # end while
15
+ # sent TEND # microphone is off
16
+ # sent TBYE # session is finished
10
17
 
11
18
  # Use sox to convert a file of almost any common type int pcm.
12
- def file2pcm file
13
- raw = `sox #{file} -s -B -r 8k -b 16 -t raw -`
14
- length = 16 # bits
15
- max = 2**length-1
16
- mid = 2**(length-1)
17
- to_signed = proc {|n| (n>=mid) ? -((n ^ max) + 1) : n}
18
- unpacked = raw.unpack 'n*'
19
- unpacked.map{|d| to_signed[d].to_f}
20
- end
21
-
19
+ # Not sure this works for anything beside 16 bits.
22
20
  # Takes a file and two IO-like objects.
23
- def send_incremental_features file, to_server, from_server
21
+ def send_incremental_features file, to_server, from_server, bits, freq
22
+ stats = {}
24
23
  nfilt = 32
25
24
  min_freq = 200
26
25
  max_freq = 3700
27
- nfft = 256
28
- freq = 8000
29
- shift = 80
30
- frame_size = 205
26
+ freq_adjustment = freq.to_i/8000
27
+ nfft = 256 * freq_adjustment
28
+ shift = 80 * freq_adjustment
29
+ frame_size = 205 * freq_adjustment
31
30
  preemphasizer = Preemphasizer.new 0.97
32
31
  segmenter = Segmenter.new frame_size, shift
33
32
  hamming_windower = HammingWindow.new frame_size
@@ -36,10 +35,13 @@ def send_incremental_features file, to_server, from_server
36
35
  compressor = LogCompressor.new
37
36
  discrete_cosine_transform = DCT.new 13, nfilt
38
37
  live_cmn = LiveCMN.new
39
- pcm = file2pcm file
38
+ pcm = file2pcm file, bits, freq
39
+ stats[:audio_length] = pcm.size/freq.to_f
40
40
  to_server.write TMAGIC
41
41
  to_server.write TSTART
42
+ stats[:process_time] = 0
42
43
  pcm.each_slice 1230 do |data|
44
+ process_time_start = Time.new
43
45
  data >>= preemphasizer
44
46
  data >>= segmenter
45
47
  next unless data
@@ -49,6 +51,7 @@ def send_incremental_features file, to_server, from_server
49
51
  data >>= compressor
50
52
  data >>= discrete_cosine_transform
51
53
  data >>= live_cmn
54
+ stats[:process_time] += Time.new - process_time_start
52
55
  to_server.write TCEPSTRA
53
56
  to_server.write [data.size].pack('N')
54
57
  print '.'
@@ -56,7 +59,29 @@ def send_incremental_features file, to_server, from_server
56
59
  to_server.flush
57
60
  end
58
61
  to_server.write TEND
59
- to_server.write TDONE
62
+ to_server.write TBYE
63
+ to_server.flush
64
+ latency_start = Time.new
65
+ stats[:transcript] = from_server.read
66
+ stats[:latency] = Time.new - latency_start
67
+ return stats
68
+ end
69
+
70
+ def send_incremental_pcm file, to_server, from_server, depth, rate
71
+ raw = `sox #{file} -s -B -r #{rate} -b #{depth} -t raw -`
72
+ to_server.write TMAGIC
73
+ to_server.write TSTART
74
+ chunk = raw.slice! 0, 1024
75
+ while chunk.size > 0
76
+ to_server.write TA16_16
77
+ to_server.write [chunk.size/2].pack('N')
78
+ to_server.write chunk
79
+ print '.'
80
+ to_server.flush
81
+ chunk = raw.slice! 0, 1024
82
+ end
83
+ to_server.write TEND
84
+ to_server.write TBYE
60
85
  to_server.flush
61
86
  from_server.read
62
87
  end
data/lib/common.rb CHANGED
@@ -2,4 +2,5 @@ require 'common/serial_filter'
2
2
  require 'common/parallel_filter'
3
3
  require 'common/noyes_dsl'
4
4
  require 'common/noyes_math'
5
+ require 'common/noyes_protocol'
5
6
  require 'common/send_incrementally'
data/lib/noyes_c.rb ADDED
@@ -0,0 +1,2 @@
1
+ require "common"
2
+ require "c_impl/noyes_c"
data/ship/noyes.jar CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: noyes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joe Woelfel
@@ -9,26 +9,64 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-02-25 00:00:00 -05:00
12
+ date: 2010-05-28 00:00:00 -04:00
13
13
  default_executable:
14
- dependencies: []
15
-
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: trollop
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.0.0
24
+ version:
16
25
  description: Currently sufficient to create features for speech recognition
17
26
  email: joe@talkhouse.com
18
27
  executables:
28
+ - mock_noyes_server
19
29
  - noyes_dump44k
20
30
  - noyes_dump8k
21
31
  - nrec
22
- extensions: []
23
-
32
+ extensions:
33
+ - lib/c_impl/extconf.rb
24
34
  extra_rdoc_files:
25
35
  - COPYING
26
36
  - FAQ
27
37
  - README
28
38
  files:
39
+ - VERSION
40
+ - lib/c_impl/discrete_cosine_transform.c
41
+ - lib/c_impl/extconf.rb
42
+ - lib/c_impl/fast_8k_mfcc.c
43
+ - lib/c_impl/hamming_window.c
44
+ - lib/c_impl/live_cmn.c
45
+ - lib/c_impl/log_compressor.c
46
+ - lib/c_impl/mel_filter.c
47
+ - lib/c_impl/n_dft.c
48
+ - lib/c_impl/n_discrete_cosine_transform.c
49
+ - lib/c_impl/n_fast_8k_mfcc.c
50
+ - lib/c_impl/n_hamming_window.c
51
+ - lib/c_impl/n_live_cmn.c
52
+ - lib/c_impl/n_log_compressor.c
53
+ - lib/c_impl/n_matrix.c
54
+ - lib/c_impl/n_mel_filter.c
55
+ - lib/c_impl/n_power_spec.c
56
+ - lib/c_impl/n_preemphasis.c
57
+ - lib/c_impl/n_segmenter.c
58
+ - lib/c_impl/noyes.h
59
+ - lib/c_impl/noyes_c.c
60
+ - lib/c_impl/power_spectrum.c
61
+ - lib/c_impl/preemphasis.c
62
+ - lib/c_impl/rnoyes.h
63
+ - lib/c_impl/segmenter.c
29
64
  - lib/common.rb
65
+ - lib/common/file2pcm.rb
66
+ - lib/common/mock_noyes_server.rb
30
67
  - lib/common/noyes_dsl.rb
31
68
  - lib/common/noyes_math.rb
69
+ - lib/common/noyes_protocol.rb
32
70
  - lib/common/parallel_filter.rb
33
71
  - lib/common/send_incrementally.rb
34
72
  - lib/common/serial_filter.rb
@@ -45,6 +83,7 @@ files:
45
83
  - lib/java_impl/preemphasis.rb
46
84
  - lib/java_impl/segment.rb
47
85
  - lib/noyes.rb
86
+ - lib/noyes_c.rb
48
87
  - lib/noyes_java.rb
49
88
  - lib/ruby_impl/dct.rb
50
89
  - lib/ruby_impl/delta.rb