RubyGems - noyes - Versions diffs - 0.9.0 → 0.9.2 - Mend

noyes 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/VERSION +1 -1
data/lib/c_impl/n_matrix.c +32 -0
data/lib/c_impl/n_speech_trimmer.c +31 -2
data/lib/c_impl/noyes.h +23 -20
data/lib/c_impl/rnoyes.h +1 -0
data/lib/c_impl/speech_trimmer.c +26 -4
data/lib/common/noyes_dsl.rb +13 -3
data/lib/java_impl/speech_trimmer.rb +6 -2
data/lib/ruby_impl/bent_cent_marker.rb +8 -1
data/lib/ruby_impl/speech_trimmer.rb +14 -1
metadata +5 -6
data/README +0 -171
data/ship/noyes.jar +0 -0

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.9.0
1	+ 0.9.2

data/lib/c_impl/n_matrix.c CHANGED Viewed

@@ -38,3 +38,35 @@ void free_nmatrix1(NMatrix1 *M) {
     free(M);
   }
 }
+// Converts a square matrix to a list of one dimensional matrices.
+// Simultaneously frees the original square matrix.
+NMatrix1 ** nmatrix_2_nmatrix1s(NMatrix *M) {
+  NMatrix1 **single = malloc(sizeof(NMatrix1*) * M->rows);
+  int i;
+  for (i=0;i<M->rows;++i) {
+    single[i] = malloc(sizeof(NMatrix1));
+    single[i]->data = M->data[i];
+    single[i]->rows = M->cols;
+  }
+  free(M->data);
+  free(M);
+  return single;
+}
+// Converts an array of one dimensional arrays into a square matrix.  It frees
+// these arrays in the process.
+NMatrix * nmatrix1_2_nmatrix(NMatrix1 **array, int size) {
+  if (size ==0)
+    return NULL;
+  NMatrix *result = malloc(sizeof(NMatrix));
+  result->data = malloc(sizeof(double*) * size);
+  result->rows = size;
+  int i;
+  for (i=0; i<size; ++i) {
+    result->data[i] = array[i]->data;
+    free(array[i]);
+  }
+  return result;
+}

data/lib/c_impl/n_speech_trimmer.c CHANGED Viewed

@@ -4,7 +4,7 @@
 #undef FALSE
 #define FALSE 0
-SpeechTrimmer * new_speech_trimmer() {
+SpeechTrimmer * new_speech_trimmer(int frequency) {
   SpeechTrimmer *self = malloc(sizeof(SpeechTrimmer));
   self->leader = 5;
   self->trailer = 5;
@@ -16,6 +16,7 @@ SpeechTrimmer * new_speech_trimmer() {
   self->eos_reached = FALSE;
   self->scs = 20;
   self->ecs = 50;
+  self->seg = new_segmenter(frequency/100, frequency/100);
   return self;
 }
@@ -25,6 +26,33 @@ void free_speech_trimmer(SpeechTrimmer *self) {
   free(self);
 }
+NMatrix * speech_trimmer_apply(SpeechTrimmer *self, NMatrix1* pcm) {
+  if (self->eos_reached)
+    return NULL;
+  NMatrix *segment_matrix = segmenter_apply(self->seg, pcm);
+  int centisecond_count = segment_matrix->rows;
+  NMatrix1 **segments = nmatrix_2_nmatrix1s(segment_matrix);
+  NMatrix1 ** speech_segments = malloc(sizeof(NMatrix*) * segment_matrix->rows);
+  int speech_count = 0, i;
+  for (i=0; i<centisecond_count ;++i) {
+    speech_trimmer_enqueue(self, segments[i]);
+    NMatrix1 *centispeech = speech_trimmer_dequeue(self);
+    while (centispeech != NULL) {
+      speech_segments[speech_count++] = centispeech;
+      centispeech = speech_trimmer_dequeue(self);
+    }
+    if (speech_trimmer_eos(self))
+      break;
+  }
+  if (speech_trimmer_eos(self) && speech_count == 0)
+    return NULL;
+  return nmatrix1_2_nmatrix(speech_segments, speech_count);
+}
 void speech_trimmer_enqueue(SpeechTrimmer *self, NMatrix1* pcm) {
   if (self->eos_reached)
     return;
@@ -55,13 +83,14 @@ NMatrix1 * speech_trimmer_dequeue(SpeechTrimmer *self) {
   if (n_list_size(self->queue) == 0)
     return NULL;
   if (self->eos_reached || (self->speech_started &&
-      n_list_size(self->queue) > self->ecs)) {
+    n_list_size(self->queue) > self->ecs)) {
     NMatrix1 * N = n_list_get(self->queue, 0);
     n_list_remove(self->queue, 0, 1);
     return N;
   }
   return NULL;
 }
 int speech_trimmer_eos(SpeechTrimmer *self) {
   return self->eos_reached;
 }

data/lib/c_impl/noyes.h CHANGED Viewed

@@ -30,6 +30,8 @@ void free_nmatrix(NMatrix *);
 NMatrix1 *new_nmatrix1(int rows);
 void free_nmatrix1(NMatrix1 *);
+NMatrix1 ** nmatrix_2_nmatrix1s(NMatrix *M);
+NMatrix * nmatrix1_2_nmatrix(NMatrix1 **array, int size);
 // Preemphasizer
 typedef struct {
@@ -126,25 +128,6 @@ LiveCMN * new_live_cmn(int dimensions, double init_mean, int window_size, int sh
 void free_live_cmn(LiveCMN *lcmn);
 NMatrix *live_cmn_apply(LiveCMN *self, NMatrix *data);
-// Fast 8k mfcc
-// This strings together all the algorithms necessary to make mfcc's from an 8k
-// signal so you don't have to.
-typedef struct {
-  Preemphasizer *pre;
-  Segmenter *seg;
-  HammingWindow *ham;
-  PowerSpectrum *pow;
-  MelFilter *mel;
-  LogCompressor *log;
-  DiscreteCosineTransform *dct;
-  LiveCMN *cmn;
-} Fast8kMfcc;
-Fast8kMfcc* new_fast_8k_mfcc();
-void free_fast_8k_mfcc(Fast8kMfcc *self);
-NMatrix *fast_8k_mfcc_apply(Fast8kMfcc *self, NMatrix1 *data);
 // Silence removal with BentCentMarker and SpeechTrimmer
 typedef struct {
   double adjustment;
@@ -171,15 +154,35 @@ typedef struct {
   int scs;
   int ecs;
   BentCentMarker *bcm;
+  Segmenter *seg;
   NList *queue;
   int eos_reached;
 } SpeechTrimmer;
 SpeechTrimmer * new_speech_trimmer();
 void free_speech_trimmer(SpeechTrimmer *self);
 void speech_trimmer_enqueue(SpeechTrimmer *self, NMatrix1* pcm);
 NMatrix1 * speech_trimmer_dequeue(SpeechTrimmer *self);
 int speech_trimmer_eos(SpeechTrimmer *self);
+NMatrix * speech_trimmer_apply(SpeechTrimmer *self, NMatrix1* pcm);
+// Fast 8k mfcc
+// This strings together all the algorithms necessary to make mfcc's from an 8k
+// signal so you don't have to.
+typedef struct {
+  Preemphasizer *pre;
+  Segmenter *seg;
+  HammingWindow *ham;
+  PowerSpectrum *pow;
+  MelFilter *mel;
+  LogCompressor *log;
+  DiscreteCosineTransform *dct;
+  LiveCMN *cmn;
+} Fast8kMfcc;
+Fast8kMfcc* new_fast_8k_mfcc();
+void free_fast_8k_mfcc(Fast8kMfcc *self);
+NMatrix *fast_8k_mfcc_apply(Fast8kMfcc *self, NMatrix1 *data);
 #ifdef __cplusplus
 }

data/lib/c_impl/rnoyes.h CHANGED Viewed

@@ -11,6 +11,7 @@ void Init_fast_8k_mfcc();
 void Init_dct();
 void Init_bent_cent_marker();
 void Init_speech_trimmer();
+void Init_n_list();
 VALUE nmatrix_2_v(NMatrix *d);
 NMatrix * v_2_nmatrix(VALUE value);

data/lib/c_impl/speech_trimmer.c CHANGED Viewed

@@ -10,8 +10,14 @@ static void speech_trimmer_free(void *p) {
   free_speech_trimmer(p);
 }
-static VALUE t_init(VALUE self) {
-  SpeechTrimmer *st = new_speech_trimmer();
+static VALUE t_init(VALUE self, VALUE args) {
+  int len = RARRAY_LEN(args);
+  SpeechTrimmer *st;
+  if (len == 1)
+    st = new_speech_trimmer(NUM2INT(rb_ary_entry(args, 0)));
+  else
+    st = new_speech_trimmer(16000);
   VALUE stv = Data_Wrap_Struct(cSpeechTrimmer, 0, speech_trimmer_free, st);
   rb_iv_set(self, "@speech_trimmer", stv);
   return self;
@@ -38,15 +44,31 @@ static VALUE t_eos(VALUE self) {
   SpeechTrimmer *st;
   VALUE stv = rb_iv_get(self, "@speech_trimmer");
   Data_Get_Struct(stv, SpeechTrimmer, st);
-  return speech_trimmer_dequeue(st) ? Qtrue : Qfalse;
+  return speech_trimmer_eos(st) ? Qtrue : Qfalse;
+}
+static VALUE t_left_shift(VALUE self, VALUE obj) {
+  NMatrix1 *M  = v_2_nmatrix1(obj);
+  SpeechTrimmer *st;
+  Data_Get_Struct(rb_iv_get(self, "@speech_trimmer"), SpeechTrimmer, st);
+  NMatrix *R = speech_trimmer_apply(st, M);
+  if (!R) {
+    free_nmatrix1(M);
+    return Qnil;
+  }
+  VALUE result = nmatrix_2_v(R);
+  free_nmatrix1(M);
+  free_nmatrix(R);
+  return result;
 }
 void Init_speech_trimmer() {
   VALUE m_noyes_c = rb_define_module("NoyesC");
   cSpeechTrimmer = rb_define_class_under(m_noyes_c, "SpeechTrimmer", rb_cObject);
-  rb_define_method(cSpeechTrimmer, "initialize", t_init, 0);
+  rb_define_method(cSpeechTrimmer, "initialize", t_init, -2);
   rb_define_method(cSpeechTrimmer, "enqueue", t_enqueue, 1);
   rb_define_method(cSpeechTrimmer, "dequeue", t_dequeue, 0);
   rb_define_method(cSpeechTrimmer, "eos?", t_eos, 0);
+  rb_define_method(cSpeechTrimmer, "<<", t_left_shift, 1);
   id_push = rb_intern("push");
 }

data/lib/common/noyes_dsl.rb CHANGED Viewed

@@ -1,17 +1,27 @@
 class Array
-  # The magic that enables the filter operator.
-  def >> other
-    other << self
+  # Run this array through a filter or anything that implements the '<<'
+  # operator.  Returns whatever the filter returns.
+  def >> filter
+    filter << self
   end
 end
 # This portion is still highly experimental.  It allows filters to be combined
 # in complicated ways using a syntax similar to Backus Naur Form.
 module NoyesFilterDSL
+  # Combines two filters into a single serial filter.  That is A + B
+  # results in a filter S such that filtering through S is the identical
+  # to filtering through A and then B.
   def + other
     other_filters = other.kind_of?(SerialFilter) ? other.filters.clone : other
     SerialFilter.new [self, other].flatten
   end
+  # Combines two filters into a single parallel filter.  That is A | B creates
+  # a new filter P such that filtering through P is identical to filtering row
+  # 0 of an array through filter A and row 1 of an array through filter B.
+  # Typically P would be used with an array of arrays.  This filter can be used
+  # with more than two filters.
   def | other
      other_filters = other.kind_of?(ParallelFilter) ? other.filtes.clone : other
      ParallelFilter.new [self, other].flatten

data/lib/java_impl/speech_trimmer.rb CHANGED Viewed

@@ -1,7 +1,11 @@
 module NoyesJava
   class SpeechTrimmer
-    def initialize
-      @st = Java::talkhouse.SpeechTrimmer.new
+    def initialize frequency = 16000
+      @st = Java::talkhouse.SpeechTrimmer.new frequency
+    end
+    def << pcm
+      result = @st.apply(pcm.to_java(Java::double))
+      result.to_a if result
     end
     def enqueue pcm
       @st.enqueue pcm.to_java(Java::double)

data/lib/ruby_impl/bent_cent_marker.rb CHANGED Viewed

@@ -1,7 +1,9 @@
 module Noyes
   # Determines whether a PCM frame is speech or not using Bent
   # Schmidt-Nielsen's algorithm.  Basically, it's an energy-based detector
-  # where the background noise level is constantly estimated.
+  # where the background noise level is constantly estimated.  You probably
+  # don't want to use this class directly.  Most of the time you'll want
+  # to use SpeechTrimmer, which uses this class.
   #
   # The pcm data should be in 100 millisecond chunks.  For example,
   # At 8000 Hz there should 80 frames of pcm.
@@ -14,6 +16,8 @@ module Noyes
       @min_signal = 0.0
       @threshold = 10.0
     end
+    # Take the log rms of an array of pcm values.
     def logrms pcm
       sum_of_squares = 0.0
       pcm.each {|sample| sum_of_squares += sample * sample}
@@ -21,6 +25,9 @@ module Noyes
       rms = Math.max rms, 1
       Math.log(rms) * 20
     end
+    # Takes a centisecond worth of pcm values and indicates whether it looks
+    # like speech.  This information is typically used by SpeechTrimmer.
     def << pcm
       is_speech = false
       current = logrms pcm

data/lib/ruby_impl/speech_trimmer.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Noyes
   # returns nil.  Then check for eos.  If eos is true you are done.
   # SpeechTrimmer is designed to work efficiently with live audio.
   class SpeechTrimmer
-    def initialize
+    def initialize frequency=16000
       @leader = 5  # Cents of leading silence to retain.
       @trailer = 5  # Cents of trailing silence to retain.
       @speech_started = false
@@ -15,6 +15,19 @@ module Noyes
       @eos_reached = false
       @scs = 20 # Centiseconds of speech before detection of utterance.
       @ecs = 50 # Centiseconds of silence before end detection.
+      @segmenter = Segmenter.new(frequency/100, frequency/100)
+    end
+    def << pcm
+      return if eos?
+      (@segmenter << pcm).inject [] do |memo, centisec|
+        enqueue centisec unless eos?
+        while x = dequeue
+          memo << x
+        end
+        break memo if eos?
+        memo
+      end
     end
     def enqueue pcm

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: noyes
 version: !ruby/object:Gem::Version
-  version: 0.9.0
+  version: 0.9.2
 platform: ruby
 authors:
 - Joe Woelfel
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-06-22 00:00:00 -04:00
+date: 2010-06-30 00:00:00 -04:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -22,7 +22,9 @@ dependencies:
       - !ruby/object:Gem::Version
         version: 1.0.0
     version:
-description: Currently sufficient to create features for speech recognition
+description: |-
+  A fast portable signal processing library sufficient for creating features for
+   speech recognition, etc.
 email: joe@talkhouse.com
 executables:
 - mock_noyes_server
@@ -34,7 +36,6 @@ extensions:
 extra_rdoc_files:
 - COPYING
 - FAQ
-- README
 files:
 - VERSION
 - lib/c_impl/array_list.c
@@ -108,10 +109,8 @@ files:
 - lib/ruby_impl/preemphasis.rb
 - lib/ruby_impl/segment.rb
 - lib/ruby_impl/speech_trimmer.rb
-- ship/noyes.jar
 - COPYING
 - FAQ
-- README
 has_rdoc: true
 homepage: http://github.com/talkhouse/noyes
 licenses: []

data/README DELETED Viewed

@@ -1,171 +0,0 @@
-Noyes is a signal processing library.  It currently has just enough signal
-processing to produce features suitable for speech recognition.
-Pronunciation:  Typically pronounced the same as 'noise'.  But "NO!... YES!" is
-considered acceptable if you say it with sufficient conviction to make people
-think you have truly changed your mind.
-Noyes is a general purpose signal processing tool that is flexible enough for
-many purposes.  However, it exists because there is a need for low-latency high
-quality speech recognition on portable wireless devices.  The most powerful
-speech recognizers are very large with huge models running on powerful cloud
-based systems.  But transmitting raw audio to these recognizers creates too
-much latency because raw audio uses too much bandwidth.  By sending compressed
-features instead of raw audio the bandwidth can be greatly reduced without
-compromising recognition accuracy.  In some cases the effect of inadequate
-bandwidth on latency can be reduced to zero.
-Because hand sets require different implementations the Noyes library is
-designed to quickly and efficiently work with and develop multiple underlying
-implementations.  All implementations are accessible via a high level dynamic
-language that includes a very expressive domain specific language for handling
-signal processing routines.  In addition, all implementations share unit tests
-written in a high level dynamic language.
-Noyes is implemented entirely in Ruby.  It's also implemented entirely in Java.
-The Java version has Ruby bindings too.  So you can have Java's speed from
-Ruby.  If you need a pure Java version you can use the generated jar.  There is
-a lot of flexibility without a lot of overhead.  All versions share the same
-unit tests, which are written in Ruby.
-The design goal is to have signal processing routines that are so simple and so
-disentangled from the overall system that anyone could extract any of the
-routines and use them elsewhere with little trouble.  Benchmarks are included.
-This library places an emphasis on expressiveness without sacrificing ultimate
-performance.  It does so by supporting multiple implementations each with Ruby
-bindings.  The pure Ruby version, while not fast, is often adequate for
-development and is the best place to add new routines.
-For examples of how to link with different implementations see the test section
-of the Rakefile.  At present only the pure Ruby implementation is exposed via
-the gem.
-Requirements:
-  Almost any version of ruby & rake.
-  Java, if you want to use the Java implementation instead of the default pure
-  ruby implementation.
-  Some of the utility scripts such as nrec and jrec may use sox, but
-  none of the core routines use it.
-Build instructions
-  rake -T
-= USAGE
-All signal processing routines use a simple DSL style inteface.  Below are some
-examples.
-== Filter operator example.
-The '>>=' operator is called the filter operator.  It modifies that data on the
-left using the filter on the right.  This is similar to the way the += operator
-works for numbers. Note that the >>= actually looks like a filter making it easy
-to remember.
-  require 'noyes'
-  data = (1..12).to_a  # An array of nonesense data.
-  segmenter = Segmenter.new 4, 2 # window size, window shift
-  hamming_filter = HammingWindow.new 4 # window size
-  power_spec_filter = PowerSpectrumFilter.new 8 # number of ffts
-  data >>= segmenter
-  data >>= hamming_filter
-  data >>= power_spec_filter
-  data >>= dct_filter
-You can expand the >>= operator out, but I think the flow is worse and there is
-more repetition, particularly when you have a lot of filters in sequence.  This
-is perfectly valid syntax though. Also, this is very useful if you don't want
-to keep a reference to your original data.
-  require 'noyes'
-  pcm_data = (1..12).to_a
-  segmenter = Segmenter.new
-  hamming_filter = HammingWindow.new 4
-  segmented_data = segmenter << pcm_data, 4, 2
-  hamming_data = hamming_filter <<  segmented_data
-  power_spectrum data = power_spec_filter hamminging_data, 8
-  dct_data = dct_filter << power_spectrum_data
-== Advanced filter DSLs
-For most things, the filter operator is simple, easy to remember, and
-very concise.  But sometimes you want to build more elaborate combinations
-of filters and use them as if you had a single filter.  In this case
-making a new classes for every possible combination creates an explosion
-of new classes and a maintainence nightmare.  Instead, there is a simple
-graph notation you can use to combine filters.  In the following example
-we'll combine all the filters from a previous example and then use them
-as if they were a single filter.
-  serial_filter = segmenter & hamming_filter & power_spec_filter & dct_filter
-  data >>= serial_filter
-It's also possible to take parallel data streams and pipe them through
-parallel filters as if you had only one data stream and only one filter.
-  data = [stream_1,stream_2]
-  parallel_filter = filter_1 | filter_2
-  data >>= parallel_filter
-It is not necessary for the data to be synchronous when using parallel filters.
-When using parallel filters the number of elements going through one filter
-does not have to equal the number of elements going through the second filter.
-You can see that you can make arbitrarily complicated graphs of filters by
-combined use of the '&' and '|' operators.  Almost identical notation is used
-to specify graphs for context free grammars.  Keep in mind that '&' take
-precedence over '|'.  In the example below stream 1 goes through filter 1 and
-filter 2 while stream 2 goes through filters 3, 4, and 5.
-  parallel_data = [stream_1,stream_2]
-  big_filter = filter_1 & filter_2 | filter_3 & filter_4 & filter_5
-  parallel_data >>= big_filter
-== Command Line Utilities
-The program nrec will process almost any format of audio file into speech
-features and send the data to a cloud hosted speech recognizer.  The resulting
-transcript will be sent back and printed out.  The nrec program uses whatever
-version of Ruby is on the path of your current environment.  It is compatible
-with both ruby 1.9, ruby 1.8x, and JRuby.  When run under JRuby it can
-optionally use a Java implementation, which is very fast.  See nrec --help for
-more information.
-== Assessing Performance for Wireless Devices
-It's important to note that the performance characteristics of live data and
-recorded data are different.   Any delay experience by a user starts from the
-time they stop speaking.  In contrast, any delay experienced when processing a
-file starts from the time a file starts processing.  For that reason file
-processing always seems slower.  Modern recognizers are easily capable of
-exceeding real time performance so that it not a factor.  The delay experienced
-by a user is typically due to the time required to transmit the audio to the
-recognizer and the time required to detect end of utterance, assuming end of
-utterance detection is used.
-If end of utterance detection is used the recognizer must wait until it has
-sufficient evidence to be reasonably sure the user has stopped talking.  This
-could mean that a suitable period of silence has passed which means the user
-incurs a slight but unavoidable delay.  End of utterance detection also could
-mean the grammar or language model does not allow for any other reasonable
-possibility even if more data were available, which may mean no delay at all
-(or even a negative delay in some cases).
-If the bandwidth of the network is low enough, which is often the case for the
-data channel of portable wireless handsets, it will take time for raw
-uncompressed audio to traverse the network.   By computing features on the
-handset it is possible to have significant reduction in bandwidth requirements
-eliminating much of the latency.  These features in turn may then be compressed
-for further bandwidth reduction.  This method exceeds what is possible with
-alternative methods of audio compression.  Further, it eliminates many of the
-distortion components that may compromise recognition accuracy.
-If all you want is a rough feeling of how responsive speech recognition will be
-over your network try speaking an utterance at the same time you enter a
-command to have a prerecorded utterance recognized.  You'll probably be
-surprised by how quickly the network is able to respond.  You may find that the
-Java implementation feels like instant response even though it takes time for
-the JVM to launch.  Ruby 1.9 is actually surprisingly quick on a reasonably
-powerful laptop.

data/ship/noyes.jar DELETED Viewed

Binary file