noyes 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.9.0
1
+ 0.9.2
@@ -38,3 +38,35 @@ void free_nmatrix1(NMatrix1 *M) {
38
38
  free(M);
39
39
  }
40
40
  }
41
+
42
+ // Converts a square matrix to a list of one dimensional matrices.
43
+ // Simultaneously frees the original square matrix.
44
+ NMatrix1 ** nmatrix_2_nmatrix1s(NMatrix *M) {
45
+ NMatrix1 **single = malloc(sizeof(NMatrix1*) * M->rows);
46
+ int i;
47
+ for (i=0;i<M->rows;++i) {
48
+ single[i] = malloc(sizeof(NMatrix1));
49
+ single[i]->data = M->data[i];
50
+ single[i]->rows = M->cols;
51
+ }
52
+ free(M->data);
53
+ free(M);
54
+ return single;
55
+ }
56
+
57
+ // Converts an array of one dimensional arrays into a square matrix. It frees
58
+ // these arrays in the process.
59
+ NMatrix * nmatrix1_2_nmatrix(NMatrix1 **array, int size) {
60
+ if (size ==0)
61
+ return NULL;
62
+ NMatrix *result = malloc(sizeof(NMatrix));
63
+ result->data = malloc(sizeof(double*) * size);
64
+ result->rows = size;
65
+ int i;
66
+ for (i=0; i<size; ++i) {
67
+ result->data[i] = array[i]->data;
68
+ free(array[i]);
69
+ }
70
+
71
+ return result;
72
+ }
@@ -4,7 +4,7 @@
4
4
  #undef FALSE
5
5
  #define FALSE 0
6
6
 
7
- SpeechTrimmer * new_speech_trimmer() {
7
+ SpeechTrimmer * new_speech_trimmer(int frequency) {
8
8
  SpeechTrimmer *self = malloc(sizeof(SpeechTrimmer));
9
9
  self->leader = 5;
10
10
  self->trailer = 5;
@@ -16,6 +16,7 @@ SpeechTrimmer * new_speech_trimmer() {
16
16
  self->eos_reached = FALSE;
17
17
  self->scs = 20;
18
18
  self->ecs = 50;
19
+ self->seg = new_segmenter(frequency/100, frequency/100);
19
20
  return self;
20
21
  }
21
22
 
@@ -25,6 +26,33 @@ void free_speech_trimmer(SpeechTrimmer *self) {
25
26
  free(self);
26
27
  }
27
28
 
29
+ NMatrix * speech_trimmer_apply(SpeechTrimmer *self, NMatrix1* pcm) {
30
+ if (self->eos_reached)
31
+ return NULL;
32
+
33
+ NMatrix *segment_matrix = segmenter_apply(self->seg, pcm);
34
+ int centisecond_count = segment_matrix->rows;
35
+ NMatrix1 **segments = nmatrix_2_nmatrix1s(segment_matrix);
36
+ NMatrix1 ** speech_segments = malloc(sizeof(NMatrix*) * segment_matrix->rows);
37
+ int speech_count = 0, i;
38
+ for (i=0; i<centisecond_count ;++i) {
39
+ speech_trimmer_enqueue(self, segments[i]);
40
+ NMatrix1 *centispeech = speech_trimmer_dequeue(self);
41
+ while (centispeech != NULL) {
42
+ speech_segments[speech_count++] = centispeech;
43
+ centispeech = speech_trimmer_dequeue(self);
44
+ }
45
+ if (speech_trimmer_eos(self))
46
+ break;
47
+ }
48
+
49
+ if (speech_trimmer_eos(self) && speech_count == 0)
50
+ return NULL;
51
+
52
+ return nmatrix1_2_nmatrix(speech_segments, speech_count);
53
+ }
54
+
55
+
28
56
  void speech_trimmer_enqueue(SpeechTrimmer *self, NMatrix1* pcm) {
29
57
  if (self->eos_reached)
30
58
  return;
@@ -55,13 +83,14 @@ NMatrix1 * speech_trimmer_dequeue(SpeechTrimmer *self) {
55
83
  if (n_list_size(self->queue) == 0)
56
84
  return NULL;
57
85
  if (self->eos_reached || (self->speech_started &&
58
- n_list_size(self->queue) > self->ecs)) {
86
+ n_list_size(self->queue) > self->ecs)) {
59
87
  NMatrix1 * N = n_list_get(self->queue, 0);
60
88
  n_list_remove(self->queue, 0, 1);
61
89
  return N;
62
90
  }
63
91
  return NULL;
64
92
  }
93
+
65
94
  int speech_trimmer_eos(SpeechTrimmer *self) {
66
95
  return self->eos_reached;
67
96
  }
data/lib/c_impl/noyes.h CHANGED
@@ -30,6 +30,8 @@ void free_nmatrix(NMatrix *);
30
30
 
31
31
  NMatrix1 *new_nmatrix1(int rows);
32
32
  void free_nmatrix1(NMatrix1 *);
33
+ NMatrix1 ** nmatrix_2_nmatrix1s(NMatrix *M);
34
+ NMatrix * nmatrix1_2_nmatrix(NMatrix1 **array, int size);
33
35
 
34
36
  // Preemphasizer
35
37
  typedef struct {
@@ -126,25 +128,6 @@ LiveCMN * new_live_cmn(int dimensions, double init_mean, int window_size, int sh
126
128
  void free_live_cmn(LiveCMN *lcmn);
127
129
  NMatrix *live_cmn_apply(LiveCMN *self, NMatrix *data);
128
130
 
129
-
130
- // Fast 8k mfcc
131
- // This strings together all the algorithms necessary to make mfcc's from an 8k
132
- // signal so you don't have to.
133
- typedef struct {
134
- Preemphasizer *pre;
135
- Segmenter *seg;
136
- HammingWindow *ham;
137
- PowerSpectrum *pow;
138
- MelFilter *mel;
139
- LogCompressor *log;
140
- DiscreteCosineTransform *dct;
141
- LiveCMN *cmn;
142
- } Fast8kMfcc;
143
-
144
- Fast8kMfcc* new_fast_8k_mfcc();
145
- void free_fast_8k_mfcc(Fast8kMfcc *self);
146
- NMatrix *fast_8k_mfcc_apply(Fast8kMfcc *self, NMatrix1 *data);
147
-
148
131
  // Silence removal with BentCentMarker and SpeechTrimmer
149
132
  typedef struct {
150
133
  double adjustment;
@@ -171,15 +154,35 @@ typedef struct {
171
154
  int scs;
172
155
  int ecs;
173
156
  BentCentMarker *bcm;
157
+ Segmenter *seg;
174
158
  NList *queue;
175
159
  int eos_reached;
176
160
  } SpeechTrimmer;
177
-
161
+
178
162
  SpeechTrimmer * new_speech_trimmer();
179
163
  void free_speech_trimmer(SpeechTrimmer *self);
180
164
  void speech_trimmer_enqueue(SpeechTrimmer *self, NMatrix1* pcm);
181
165
  NMatrix1 * speech_trimmer_dequeue(SpeechTrimmer *self);
182
166
  int speech_trimmer_eos(SpeechTrimmer *self);
167
+ NMatrix * speech_trimmer_apply(SpeechTrimmer *self, NMatrix1* pcm);
168
+
169
+ // Fast 8k mfcc
170
+ // This strings together all the algorithms necessary to make mfcc's from an 8k
171
+ // signal so you don't have to.
172
+ typedef struct {
173
+ Preemphasizer *pre;
174
+ Segmenter *seg;
175
+ HammingWindow *ham;
176
+ PowerSpectrum *pow;
177
+ MelFilter *mel;
178
+ LogCompressor *log;
179
+ DiscreteCosineTransform *dct;
180
+ LiveCMN *cmn;
181
+ } Fast8kMfcc;
182
+
183
+ Fast8kMfcc* new_fast_8k_mfcc();
184
+ void free_fast_8k_mfcc(Fast8kMfcc *self);
185
+ NMatrix *fast_8k_mfcc_apply(Fast8kMfcc *self, NMatrix1 *data);
183
186
 
184
187
  #ifdef __cplusplus
185
188
  }
data/lib/c_impl/rnoyes.h CHANGED
@@ -11,6 +11,7 @@ void Init_fast_8k_mfcc();
11
11
  void Init_dct();
12
12
  void Init_bent_cent_marker();
13
13
  void Init_speech_trimmer();
14
+ void Init_n_list();
14
15
 
15
16
  VALUE nmatrix_2_v(NMatrix *d);
16
17
  NMatrix * v_2_nmatrix(VALUE value);
@@ -10,8 +10,14 @@ static void speech_trimmer_free(void *p) {
10
10
  free_speech_trimmer(p);
11
11
  }
12
12
 
13
- static VALUE t_init(VALUE self) {
14
- SpeechTrimmer *st = new_speech_trimmer();
13
+ static VALUE t_init(VALUE self, VALUE args) {
14
+ int len = RARRAY_LEN(args);
15
+ SpeechTrimmer *st;
16
+ if (len == 1)
17
+ st = new_speech_trimmer(NUM2INT(rb_ary_entry(args, 0)));
18
+ else
19
+ st = new_speech_trimmer(16000);
20
+
15
21
  VALUE stv = Data_Wrap_Struct(cSpeechTrimmer, 0, speech_trimmer_free, st);
16
22
  rb_iv_set(self, "@speech_trimmer", stv);
17
23
  return self;
@@ -38,15 +44,31 @@ static VALUE t_eos(VALUE self) {
38
44
  SpeechTrimmer *st;
39
45
  VALUE stv = rb_iv_get(self, "@speech_trimmer");
40
46
  Data_Get_Struct(stv, SpeechTrimmer, st);
41
- return speech_trimmer_dequeue(st) ? Qtrue : Qfalse;
47
+ return speech_trimmer_eos(st) ? Qtrue : Qfalse;
48
+ }
49
+
50
+ static VALUE t_left_shift(VALUE self, VALUE obj) {
51
+ NMatrix1 *M = v_2_nmatrix1(obj);
52
+ SpeechTrimmer *st;
53
+ Data_Get_Struct(rb_iv_get(self, "@speech_trimmer"), SpeechTrimmer, st);
54
+ NMatrix *R = speech_trimmer_apply(st, M);
55
+ if (!R) {
56
+ free_nmatrix1(M);
57
+ return Qnil;
58
+ }
59
+ VALUE result = nmatrix_2_v(R);
60
+ free_nmatrix1(M);
61
+ free_nmatrix(R);
62
+ return result;
42
63
  }
43
64
 
44
65
  void Init_speech_trimmer() {
45
66
  VALUE m_noyes_c = rb_define_module("NoyesC");
46
67
  cSpeechTrimmer = rb_define_class_under(m_noyes_c, "SpeechTrimmer", rb_cObject);
47
- rb_define_method(cSpeechTrimmer, "initialize", t_init, 0);
68
+ rb_define_method(cSpeechTrimmer, "initialize", t_init, -2);
48
69
  rb_define_method(cSpeechTrimmer, "enqueue", t_enqueue, 1);
49
70
  rb_define_method(cSpeechTrimmer, "dequeue", t_dequeue, 0);
50
71
  rb_define_method(cSpeechTrimmer, "eos?", t_eos, 0);
72
+ rb_define_method(cSpeechTrimmer, "<<", t_left_shift, 1);
51
73
  id_push = rb_intern("push");
52
74
  }
@@ -1,17 +1,27 @@
1
1
  class Array
2
- # The magic that enables the filter operator.
3
- def >> other
4
- other << self
2
+ # Run this array through a filter or anything that implements the '<<'
3
+ # operator. Returns whatever the filter returns.
4
+ def >> filter
5
+ filter << self
5
6
  end
6
7
  end
7
8
 
8
9
  # This portion is still highly experimental. It allows filters to be combined
9
10
  # in complicated ways using a syntax similar to Backus Naur Form.
10
11
  module NoyesFilterDSL
12
+ # Combines two filters into a single serial filter. That is A + B
13
+ # results in a filter S such that filtering through S is the identical
14
+ # to filtering through A and then B.
11
15
  def + other
12
16
  other_filters = other.kind_of?(SerialFilter) ? other.filters.clone : other
13
17
  SerialFilter.new [self, other].flatten
14
18
  end
19
+
20
+ # Combines two filters into a single parallel filter. That is A | B creates
21
+ # a new filter P such that filtering through P is identical to filtering row
22
+ # 0 of an array through filter A and row 1 of an array through filter B.
23
+ # Typically P would be used with an array of arrays. This filter can be used
24
+ # with more than two filters.
15
25
  def | other
16
26
  other_filters = other.kind_of?(ParallelFilter) ? other.filtes.clone : other
17
27
  ParallelFilter.new [self, other].flatten
@@ -1,7 +1,11 @@
1
1
  module NoyesJava
2
2
  class SpeechTrimmer
3
- def initialize
4
- @st = Java::talkhouse.SpeechTrimmer.new
3
+ def initialize frequency = 16000
4
+ @st = Java::talkhouse.SpeechTrimmer.new frequency
5
+ end
6
+ def << pcm
7
+ result = @st.apply(pcm.to_java(Java::double))
8
+ result.to_a if result
5
9
  end
6
10
  def enqueue pcm
7
11
  @st.enqueue pcm.to_java(Java::double)
@@ -1,7 +1,9 @@
1
1
  module Noyes
2
2
  # Determines whether a PCM frame is speech or not using Bent
3
3
  # Schmidt-Nielsen's algorithm. Basically, it's an energy-based detector
4
- # where the background noise level is constantly estimated.
4
+ # where the background noise level is constantly estimated. You probably
5
+ # don't want to use this class directly. Most of the time you'll want
6
+ # to use SpeechTrimmer, which uses this class.
5
7
  #
6
8
  # The pcm data should be in 100 millisecond chunks. For example,
7
9
  # At 8000 Hz there should 80 frames of pcm.
@@ -14,6 +16,8 @@ module Noyes
14
16
  @min_signal = 0.0
15
17
  @threshold = 10.0
16
18
  end
19
+
20
+ # Take the log rms of an array of pcm values.
17
21
  def logrms pcm
18
22
  sum_of_squares = 0.0
19
23
  pcm.each {|sample| sum_of_squares += sample * sample}
@@ -21,6 +25,9 @@ module Noyes
21
25
  rms = Math.max rms, 1
22
26
  Math.log(rms) * 20
23
27
  end
28
+
29
+ # Takes a centisecond worth of pcm values and indicates whether it looks
30
+ # like speech. This information is typically used by SpeechTrimmer.
24
31
  def << pcm
25
32
  is_speech = false
26
33
  current = logrms pcm
@@ -4,7 +4,7 @@ module Noyes
4
4
  # returns nil. Then check for eos. If eos is true you are done.
5
5
  # SpeechTrimmer is designed to work efficiently with live audio.
6
6
  class SpeechTrimmer
7
- def initialize
7
+ def initialize frequency=16000
8
8
  @leader = 5 # Cents of leading silence to retain.
9
9
  @trailer = 5 # Cents of trailing silence to retain.
10
10
  @speech_started = false
@@ -15,6 +15,19 @@ module Noyes
15
15
  @eos_reached = false
16
16
  @scs = 20 # Centiseconds of speech before detection of utterance.
17
17
  @ecs = 50 # Centiseconds of silence before end detection.
18
+ @segmenter = Segmenter.new(frequency/100, frequency/100)
19
+ end
20
+
21
+ def << pcm
22
+ return if eos?
23
+ (@segmenter << pcm).inject [] do |memo, centisec|
24
+ enqueue centisec unless eos?
25
+ while x = dequeue
26
+ memo << x
27
+ end
28
+ break memo if eos?
29
+ memo
30
+ end
18
31
  end
19
32
 
20
33
  def enqueue pcm
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: noyes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joe Woelfel
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-06-22 00:00:00 -04:00
12
+ date: 2010-06-30 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -22,7 +22,9 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: 1.0.0
24
24
  version:
25
- description: Currently sufficient to create features for speech recognition
25
+ description: |-
26
+ A fast portable signal processing library sufficient for creating features for
27
+ speech recognition, etc.
26
28
  email: joe@talkhouse.com
27
29
  executables:
28
30
  - mock_noyes_server
@@ -34,7 +36,6 @@ extensions:
34
36
  extra_rdoc_files:
35
37
  - COPYING
36
38
  - FAQ
37
- - README
38
39
  files:
39
40
  - VERSION
40
41
  - lib/c_impl/array_list.c
@@ -108,10 +109,8 @@ files:
108
109
  - lib/ruby_impl/preemphasis.rb
109
110
  - lib/ruby_impl/segment.rb
110
111
  - lib/ruby_impl/speech_trimmer.rb
111
- - ship/noyes.jar
112
112
  - COPYING
113
113
  - FAQ
114
- - README
115
114
  has_rdoc: true
116
115
  homepage: http://github.com/talkhouse/noyes
117
116
  licenses: []
data/README DELETED
@@ -1,171 +0,0 @@
1
- Noyes is a signal processing library. It currently has just enough signal
2
- processing to produce features suitable for speech recognition.
3
-
4
- Pronunciation: Typically pronounced the same as 'noise'. But "NO!... YES!" is
5
- considered acceptable if you say it with sufficient conviction to make people
6
- think you have truly changed your mind.
7
-
8
- Noyes is a general purpose signal processing tool that is flexible enough for
9
- many purposes. However, it exists because there is a need for low-latency high
10
- quality speech recognition on portable wireless devices. The most powerful
11
- speech recognizers are very large with huge models running on powerful cloud
12
- based systems. But transmitting raw audio to these recognizers creates too
13
- much latency because raw audio uses too much bandwidth. By sending compressed
14
- features instead of raw audio the bandwidth can be greatly reduced without
15
- compromising recognition accuracy. In some cases the effect of inadequate
16
- bandwidth on latency can be reduced to zero.
17
-
18
- Because hand sets require different implementations the Noyes library is
19
- designed to quickly and efficiently work with and develop multiple underlying
20
- implementations. All implementations are accessible via a high level dynamic
21
- language that includes a very expressive domain specific language for handling
22
- signal processing routines. In addition, all implementations share unit tests
23
- written in a high level dynamic language.
24
-
25
- Noyes is implemented entirely in Ruby. It's also implemented entirely in Java.
26
- The Java version has Ruby bindings too. So you can have Java's speed from
27
- Ruby. If you need a pure Java version you can use the generated jar. There is
28
- a lot of flexibility without a lot of overhead. All versions share the same
29
- unit tests, which are written in Ruby.
30
-
31
- The design goal is to have signal processing routines that are so simple and so
32
- disentangled from the overall system that anyone could extract any of the
33
- routines and use them elsewhere with little trouble. Benchmarks are included.
34
-
35
- This library places an emphasis on expressiveness without sacrificing ultimate
36
- performance. It does so by supporting multiple implementations each with Ruby
37
- bindings. The pure Ruby version, while not fast, is often adequate for
38
- development and is the best place to add new routines.
39
-
40
- For examples of how to link with different implementations see the test section
41
- of the Rakefile. At present only the pure Ruby implementation is exposed via
42
- the gem.
43
-
44
- Requirements:
45
- Almost any version of ruby & rake.
46
- Java, if you want to use the Java implementation instead of the default pure
47
- ruby implementation.
48
-
49
- Some of the utility scripts such as nrec and jrec may use sox, but
50
- none of the core routines use it.
51
-
52
- Build instructions
53
- rake -T
54
-
55
-
56
- = USAGE
57
-
58
- All signal processing routines use a simple DSL style inteface. Below are some
59
- examples.
60
-
61
- == Filter operator example.
62
- The '>>=' operator is called the filter operator. It modifies that data on the
63
- left using the filter on the right. This is similar to the way the += operator
64
- works for numbers. Note that the >>= actually looks like a filter making it easy
65
- to remember.
66
-
67
- require 'noyes'
68
- data = (1..12).to_a # An array of nonesense data.
69
- segmenter = Segmenter.new 4, 2 # window size, window shift
70
- hamming_filter = HammingWindow.new 4 # window size
71
- power_spec_filter = PowerSpectrumFilter.new 8 # number of ffts
72
-
73
- data >>= segmenter
74
- data >>= hamming_filter
75
- data >>= power_spec_filter
76
- data >>= dct_filter
77
-
78
- You can expand the >>= operator out, but I think the flow is worse and there is
79
- more repetition, particularly when you have a lot of filters in sequence. This
80
- is perfectly valid syntax though. Also, this is very useful if you don't want
81
- to keep a reference to your original data.
82
-
83
- require 'noyes'
84
- pcm_data = (1..12).to_a
85
- segmenter = Segmenter.new
86
- hamming_filter = HammingWindow.new 4
87
- segmented_data = segmenter << pcm_data, 4, 2
88
- hamming_data = hamming_filter << segmented_data
89
- power_spectrum data = power_spec_filter hamminging_data, 8
90
- dct_data = dct_filter << power_spectrum_data
91
-
92
- == Advanced filter DSLs
93
- For most things, the filter operator is simple, easy to remember, and
94
- very concise. But sometimes you want to build more elaborate combinations
95
- of filters and use them as if you had a single filter. In this case
96
- making a new classes for every possible combination creates an explosion
97
- of new classes and a maintainence nightmare. Instead, there is a simple
98
- graph notation you can use to combine filters. In the following example
99
- we'll combine all the filters from a previous example and then use them
100
- as if they were a single filter.
101
-
102
- serial_filter = segmenter & hamming_filter & power_spec_filter & dct_filter
103
- data >>= serial_filter
104
-
105
- It's also possible to take parallel data streams and pipe them through
106
- parallel filters as if you had only one data stream and only one filter.
107
-
108
- data = [stream_1,stream_2]
109
- parallel_filter = filter_1 | filter_2
110
- data >>= parallel_filter
111
-
112
- It is not necessary for the data to be synchronous when using parallel filters.
113
- When using parallel filters the number of elements going through one filter
114
- does not have to equal the number of elements going through the second filter.
115
-
116
- You can see that you can make arbitrarily complicated graphs of filters by
117
- combined use of the '&' and '|' operators. Almost identical notation is used
118
- to specify graphs for context free grammars. Keep in mind that '&' take
119
- precedence over '|'. In the example below stream 1 goes through filter 1 and
120
- filter 2 while stream 2 goes through filters 3, 4, and 5.
121
-
122
- parallel_data = [stream_1,stream_2]
123
- big_filter = filter_1 & filter_2 | filter_3 & filter_4 & filter_5
124
- parallel_data >>= big_filter
125
-
126
- == Command Line Utilities
127
-
128
- The program nrec will process almost any format of audio file into speech
129
- features and send the data to a cloud hosted speech recognizer. The resulting
130
- transcript will be sent back and printed out. The nrec program uses whatever
131
- version of Ruby is on the path of your current environment. It is compatible
132
- with both ruby 1.9, ruby 1.8x, and JRuby. When run under JRuby it can
133
- optionally use a Java implementation, which is very fast. See nrec --help for
134
- more information.
135
-
136
- == Assessing Performance for Wireless Devices
137
-
138
- It's important to note that the performance characteristics of live data and
139
- recorded data are different. Any delay experience by a user starts from the
140
- time they stop speaking. In contrast, any delay experienced when processing a
141
- file starts from the time a file starts processing. For that reason file
142
- processing always seems slower. Modern recognizers are easily capable of
143
- exceeding real time performance so that it not a factor. The delay experienced
144
- by a user is typically due to the time required to transmit the audio to the
145
- recognizer and the time required to detect end of utterance, assuming end of
146
- utterance detection is used.
147
-
148
- If end of utterance detection is used the recognizer must wait until it has
149
- sufficient evidence to be reasonably sure the user has stopped talking. This
150
- could mean that a suitable period of silence has passed which means the user
151
- incurs a slight but unavoidable delay. End of utterance detection also could
152
- mean the grammar or language model does not allow for any other reasonable
153
- possibility even if more data were available, which may mean no delay at all
154
- (or even a negative delay in some cases).
155
-
156
- If the bandwidth of the network is low enough, which is often the case for the
157
- data channel of portable wireless handsets, it will take time for raw
158
- uncompressed audio to traverse the network. By computing features on the
159
- handset it is possible to have significant reduction in bandwidth requirements
160
- eliminating much of the latency. These features in turn may then be compressed
161
- for further bandwidth reduction. This method exceeds what is possible with
162
- alternative methods of audio compression. Further, it eliminates many of the
163
- distortion components that may compromise recognition accuracy.
164
-
165
- If all you want is a rough feeling of how responsive speech recognition will be
166
- over your network try speaking an utterance at the same time you enter a
167
- command to have a prerecorded utterance recognized. You'll probably be
168
- surprised by how quickly the network is able to respond. You may find that the
169
- Java implementation feels like instant response even though it takes time for
170
- the JVM to launch. Ruby 1.9 is actually surprisingly quick on a reasonably
171
- powerful laptop.
data/ship/noyes.jar DELETED
Binary file