RubyGems - pocketsphinx-ruby - Versions diffs - 0.2.0 → 0.3.0 - Mend

pocketsphinx-ruby 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/.travis.yml +3 -2
data/CHANGELOG.md +7 -0
data/README.md +24 -4
data/examples/decode_audio_file.rb +1 -1
data/examples/record_audio_file.rb +1 -1
data/lib/pocketsphinx/api/pocketsphinx.rb +15 -1
data/lib/pocketsphinx/audio_file.rb +1 -1
data/lib/pocketsphinx/audio_file_speech_recognizer.rb +1 -1
data/lib/pocketsphinx/decoder.rb +40 -1
data/lib/pocketsphinx/microphone.rb +2 -2
data/lib/pocketsphinx/speech_recognizer.rb +1 -1
data/lib/pocketsphinx/version.rb +1 -1
data/spec/assets/audio/hello.wav +0 -0
data/spec/configuration_spec.rb +2 -2
data/spec/decoder_spec.rb +47 -7
data/spec/integration/decoder_spec.rb +25 -3
data/spec/integration/default_recognition_spec.rb +1 -1
data/spec/integration/grammar_recognition_spec.rb +1 -1
data/spec/integration/keyword_recognition_spec.rb +1 -1
data/spec/microphone_spec.rb +4 -4
metadata +4 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3b38998dd59c577300db4b63bfda52e037c49409
-  data.tar.gz: 82dd10ee0f739d6459013513ae986d6e60124dfd
+  metadata.gz: 1be9c28022d45172119190075e2c4f340f129022
+  data.tar.gz: f00d504978967e201a795874fcdee1038cb96d03
 SHA512:
-  metadata.gz: d11e77f70106d9beaab587e7c8372dc217a9e511e7058e6d4f0a55fb4ce040b5ad50ef203a9755324ebaccc6fe919919eb04e5bb4770a290413b9990cecbfec6
-  data.tar.gz: fbea33173c9c8bdca4458c2dcb1f88e5643be169d8a54f4ed3baf593677f790d754e0f58a1c37ca527cbfeeeb9c0b19b28461afe417da509c2073543c33522ee
+  metadata.gz: 3407c5ce61d32c3576ce1fa6d16bc3249e0f74fc990b7b5dc8005c545e108cdca776e52e985354636a96ad626db2de90f71c5dd499ab29171b7b25a314cd5c26
+  data.tar.gz: b8be240f48b2962dd417c19589be7dba6a5b5460c9046deb66972507fe4655718884ad60d86ab8838c3428c946913e2e38ba5524bc1403f320ad01ee43a8e7f2

data/.travis.yml CHANGED

@@ -1,10 +1,11 @@
 language: ruby
 rvm:
-  - 2.1.2
+  - 2.2.2
+  - 2.1.6
   - 2.0.0
   - 1.9.3
   - rbx-2.2.9
-  - jruby-1.7.16
+  - jruby-1.7.19
 before_install:
   - sudo apt-get update -qq
   - sudo apt-get install -y swig

data/CHANGELOG.md CHANGED

@@ -1,5 +1,12 @@
 # Changelog
+**v0.3.0 - 17/04/15**
+* Add Words support to Decoder
+* Don't lazy initialize Pocketsphinx decoder
+* Use buffer size of 2048 as now [required](https://github.com/cmusphinx/pocketsphinx/commit/541b5dfa87ef5fffe509d7c195803fd45749db5e) by Pocketsphinx
 **v0.2.0 - 03/03/15**
 * Updated to latest Pocketsphinx API interface

data/README.md CHANGED

@@ -69,7 +69,7 @@ The `AudioFileSpeechRecognizer` decodes directly from an audio file by coordinat
 recognizer = Pocketsphinx::AudioFileSpeechRecognizer.new
 recognizer.recognize('spec/assets/audio/goforward.raw') do |speech|
-  puts speech # => "go forward ten years"
+  puts speech # => "go forward ten meters"
 end
 ```
@@ -110,9 +110,9 @@ microphone = Pocketsphinx::Microphone.new
 File.open("test.raw", "wb") do |file|
   microphone.record do
-    FFI::MemoryPointer.new(:int16, 4096) do |buffer|
+    FFI::MemoryPointer.new(:int16, 2048) do |buffer|
       50.times do
-        sample_count = microphone.read_audio(buffer, 4096)
+        sample_count = microphone.read_audio(buffer, 2048)
         file.write buffer.get_bytes(0, sample_count * 2)
         sleep 0.1
@@ -133,7 +133,27 @@ The `Decoder` class uses Pocketsphinx's libpocketsphinx to decode audio data int
 decoder = Pocketsphinx::Decoder.new(Pocketsphinx::Configuration.default)
 decoder.decode 'spec/assets/audio/goforward.raw'
-puts decoder.hypothesis # => "go forward ten years"
+puts decoder.hypothesis # => "go forward ten meters"
+```
+And split into individual words with frame data:
+```ruby
+decoder.words
+# => [
+#  #<struct Pocketsphinx::Decoder::Word word="<s>", start_frame=608, end_frame=610>,
+#  #<struct Pocketsphinx::Decoder::Word word="go", start_frame=611, end_frame=622>,
+#  #<struct Pocketsphinx::Decoder::Word word="forward", start_frame=623, end_frame=675>,
+#  #<struct Pocketsphinx::Decoder::Word word="ten", start_frame=676, end_frame=711>,
+#  #<struct Pocketsphinx::Decoder::Word word="meters", start_frame=712, end_frame=770>,
+#  #<struct Pocketsphinx::Decoder::Word word="</s>", start_frame=771, end_frame=821>
+# ]
+```
+Note: When the `Decoder` is initialized, the supplied `Configuration` is updated by Pocketsphinx with some settings from the acoustic model. To see exactly what's going on:
+```ruby
+Pocketsphinx::Decoder.new(Pocketsphinx::Configuration.default).configuration.changes
 ```

data/examples/decode_audio_file.rb CHANGED

@@ -8,4 +8,4 @@ include Pocketsphinx
 decoder = Decoder.new(Configuration.default)
 decoder.decode 'spec/assets/audio/goforward.raw'
-puts decoder.hypothesis # => "go forward ten years"
+puts decoder.hypothesis # => "go forward ten meters"

data/examples/record_audio_file.rb CHANGED

@@ -5,7 +5,7 @@ require "pocketsphinx-ruby"
 include Pocketsphinx
-MAX_SAMPLES = 4096
+MAX_SAMPLES = 2048
 RECORDING_INTERVAL = 0.1
 RECORDING_LENGTH = 5

data/lib/pocketsphinx/api/pocketsphinx.rb CHANGED

@@ -7,7 +7,12 @@ module Pocketsphinx
       typedef :pointer, :decoder
       typedef :pointer, :configuration
-      attach_function :ps_init, [:configuration], :decoder
+      # Allows expect(API::Pocketsphinx).to receive(:ps_init) in JRuby specs
+      def self.ps_init(*args)
+        ps_init_private(*args)
+      end
+      attach_function :ps_init_private, :ps_init, [:configuration], :decoder
       attach_function :ps_reinit, [:decoder, :configuration], :int
       attach_function :ps_default_search_args, [:pointer], :void
       attach_function :ps_args, [], :pointer
@@ -21,6 +26,15 @@ module Pocketsphinx
       attach_function :ps_unset_search, [:decoder, :string], :int
       attach_function :ps_get_search, [:decoder], :string
       attach_function :ps_set_search, [:decoder, :string], :int
+      typedef :pointer, :seg_iter
+      attach_function :ps_seg_iter, [:decoder, :pointer], :seg_iter
+      attach_function :ps_seg_next, [:seg_iter], :seg_iter
+      attach_function :ps_seg_word, [:seg_iter], :string
+      attach_function :ps_seg_frames, [:seg_iter, :pointer, :pointer], :void
+      attach_function :ps_seg_prob, [:seg_iter, :pointer, :pointer, :pointer], :int32
+      attach_function :ps_seg_free, [:seg_iter], :void
     end
   end
 end

data/lib/pocketsphinx/audio_file.rb CHANGED

@@ -6,7 +6,7 @@ module Pocketsphinx
     # @param [FFI::Pointer] buffer 16bit buffer of at least max_samples in size
     # @params [Fixnum] max_samples The maximum number of samples to read from the audio file
     # @return [Fixnum] Samples actually read; nil if EOF
-    def read_audio(buffer, max_samples = 4096)
+    def read_audio(buffer, max_samples = 2048)
       if file.nil?
         raise "Can't read audio: use AudioFile#start_recording to open the file first"
       end

data/lib/pocketsphinx/audio_file_speech_recognizer.rb CHANGED

@@ -1,7 +1,7 @@
 module Pocketsphinx
   # High-level class for live speech recognition from a raw audio file.
   class AudioFileSpeechRecognizer < SpeechRecognizer
-    def recognize(file_path, max_samples = 4096)
+    def recognize(file_path, max_samples = 2048)
       self.recordable = AudioFile.new(file_path)
       super(max_samples) do |speech|

data/lib/pocketsphinx/decoder.rb CHANGED

@@ -1,5 +1,5 @@
 module Pocketsphinx
-  class Decoder < Struct.new(:configuration)
+  class Decoder
     require 'delegate'
     include API::CallHelpers
@@ -13,7 +13,22 @@ module Pocketsphinx
       end
     end
+    Word = Struct.new(:word, :start_frame, :end_frame)
     attr_writer :ps_api
+    attr_accessor :configuration
+    # Initialize a Decoder
+    #
+    # Note that this initialization process actually updates the Configuration based on settings
+    # which are found in feat.params along with the acoustic model.
+    #
+    # @param [Configuration] configuration
+    # @param [FFI::Pointer] ps_decoder An optional Pocketsphinx decoder. One is initialized if not provided.
+    def initialize(configuration, ps_decoder = nil)
+      @configuration = configuration
+      init_decoder if ps_decoder.nil?
+    end
     # Reinitialize the decoder with updated configuration.
     #
@@ -108,6 +123,30 @@ module Pocketsphinx
       )
     end
+    # Get an array of words with start/end frame values (10msec/frame) for current hypothesis
+    #
+    # @return [Array] Array of words with start/end frame values (10msec/frame)
+    def words
+      mp_path_score = FFI::MemoryPointer.new(:int32, 1)
+      start_frame   = FFI::MemoryPointer.new(:int32, 1)
+      end_frame     = FFI::MemoryPointer.new(:int32, 1)
+      seg_iter = ps_api.ps_seg_iter(ps_decoder, mp_path_score)
+      words    = []
+      until seg_iter.null? do
+        ps_api.ps_seg_frames(seg_iter, start_frame, end_frame)
+        words << Pocketsphinx::Decoder::Word.new(
+          ps_api.ps_seg_word(seg_iter),
+          start_frame.get_int32(0),
+          end_frame.get_int32(0)
+        )
+        seg_iter = ps_api.ps_seg_next(seg_iter)
+      end
+      words
+    end
     # Adds new search using JSGF model.
     #
     # Convenience method to parse JSGF model from string and create a search.

data/lib/pocketsphinx/microphone.rb CHANGED

@@ -49,7 +49,7 @@ module Pocketsphinx
     # @params [Fixnum] max_samples The maximum number of samples to read from the audio device
     # @return [Fixnum] Samples actually read (could be 0 since non-blocking); nil if not
     #   recording and no more samples remaining to be read from most recent recording.
-    def read_audio(buffer, max_samples = 4096)
+    def read_audio(buffer, max_samples = 2048)
       samples = ps_api.ad_read(@ps_audio_device, buffer, max_samples)
       samples if samples >= 0
     end
@@ -60,7 +60,7 @@ module Pocketsphinx
     # we specify a delay which should fill half of the max buffer size
     #
     # @param [Fixnum] max_samples The maximum samples we tried to read from the audio device
-    def read_audio_delay(max_samples = 4096)
+    def read_audio_delay(max_samples = 2048)
       max_samples.to_f / (2 * sample_rate)
     end

data/lib/pocketsphinx/speech_recognizer.rb CHANGED

@@ -43,7 +43,7 @@ module Pocketsphinx
     # Recognize speech and yield hypotheses in infinite loop
     #
     # @param [Fixnum] max_samples Number of samples to process at a time
-    def recognize(max_samples = 4096, &b)
+    def recognize(max_samples = 2048, &b)
       unless ALGORITHMS.include?(algorithm)
         raise NotImplementedError, "Unknown speech recognition algorithm: #{algorithm}"
       end

data/lib/pocketsphinx/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Pocketsphinx
-  VERSION = "0.2.0"
+  VERSION = "0.3.0"
 end

data/spec/assets/audio/hello.wav ADDED

Binary file

data/spec/configuration_spec.rb CHANGED

@@ -65,7 +65,7 @@ describe Pocketsphinx::Configuration do
   describe '#setting_names' do
     it 'contains the names of all possible system settings' do
-      expect(subject.setting_names.count).to eq(112)
+      expect(subject.setting_names.count).to eq(114)
     end
   end
@@ -84,7 +84,7 @@ describe Pocketsphinx::Configuration do
     it 'gives details for all settings when no name is specified' do
       details = subject.details
-      expect(details.count).to eq(112)
+      expect(details.count).to eq(114)
       expect(details.first).to eq({
         name: "agc",
         type: :string,

data/spec/decoder_spec.rb CHANGED

@@ -1,7 +1,7 @@
 require 'spec_helper'
 describe Pocketsphinx::Decoder do
-  subject { Pocketsphinx::Decoder.new(configuration) }
+  subject { Pocketsphinx::Decoder.new(configuration, ps_decoder) }
   let(:ps_api) { subject.ps_api }
   let(:ps_decoder) { double }
   let(:configuration) { Pocketsphinx::Configuration.default }
@@ -11,6 +11,17 @@ describe Pocketsphinx::Decoder do
     allow(ps_api).to receive(:ps_init).and_return(ps_decoder)
   end
+  describe 'initialization' do
+    it 'initializes the underlying Pocketsphinx decoder when one is not provided' do
+      expect(Pocketsphinx::API::Pocketsphinx)
+        .to receive(:ps_init)
+        .with(configuration.ps_config)
+        .and_return(ps_decoder)
+      Pocketsphinx::Decoder.new(configuration)
+    end
+  end
   describe '#reconfigure' do
     it 'calls libpocketsphinx and the configuration post initialize hook' do
       expect(ps_api)
@@ -53,24 +64,24 @@ describe Pocketsphinx::Decoder do
   describe '#process_raw' do
     it 'calls libpocketsphinx' do
-      FFI::MemoryPointer.new(:int16, 4096) do |buffer|
+      FFI::MemoryPointer.new(:int16, 2048) do |buffer|
         expect(ps_api)
           .to receive(:ps_process_raw)
-          .with(subject.ps_decoder, buffer, 4096, 0, 0)
+          .with(subject.ps_decoder, buffer, 2048, 0, 0)
           .and_return(0)
-        subject.process_raw(buffer, 4096, false, false)
+        subject.process_raw(buffer, 2048, false, false)
       end
     end
     it 'raises an exception on error' do
-      FFI::MemoryPointer.new(:int16, 4096) do |buffer|
+      FFI::MemoryPointer.new(:int16, 2048) do |buffer|
         expect(ps_api)
           .to receive(:ps_process_raw)
-          .with(subject.ps_decoder, buffer, 4096, 0, 0)
+          .with(subject.ps_decoder, buffer, 2048, 0, 0)
           .and_return(-1)
-        expect { subject.process_raw(buffer, 4096, false, false) }
+        expect { subject.process_raw(buffer, 2048, false, false) }
           .to raise_exception "Decoder#process_raw failed with error code -1"
       end
     end
@@ -148,6 +159,35 @@ describe Pocketsphinx::Decoder do
     end
   end
+  describe '#words' do
+    let(:iterator) { FFI::MemoryPointer.from_string("") }
+    it 'calls libpocketsphinx' do
+      expect(ps_api).to receive(:ps_seg_iter).ordered.and_return(iterator)
+      expect(ps_api).to receive(:ps_seg_frames).ordered do |seg_iter, start_frame, end_frame|
+        start_frame.put_int16(0, 10)
+        end_frame.put_int16(0, 20)
+      end
+      expect(ps_api).to receive(:ps_seg_word).ordered.and_return("one")
+      expect(ps_api).to receive(:ps_seg_next).ordered.and_return(iterator)
+      expect(ps_api).to receive(:ps_seg_frames).ordered do |seg_iter, start_frame, end_frame|
+        start_frame.put_int16(0, 30)
+        end_frame.put_int16(0, 40)
+      end
+      expect(ps_api).to receive(:ps_seg_word).ordered.and_return("two")
+      expect(ps_api).to receive(:ps_seg_next).ordered.and_return(FFI::Pointer::NULL)
+      words = subject.words
+      expect(words[0]).to eq(Pocketsphinx::Decoder::Word.new("one", 10, 20))
+      expect(words[1]).to eq(Pocketsphinx::Decoder::Word.new("two", 30, 40))
+    end
+  end
   describe '#set_jsgf_string' do
     it 'calls libpocketsphinx' do
       expect(ps_api)

data/spec/integration/decoder_spec.rb CHANGED

@@ -5,22 +5,44 @@ describe Pocketsphinx::Decoder do
   let(:configuration) { @configuration }
   # Share decoder across all examples for speed
-  before :all do
+  before do
     @configuration = Pocketsphinx::Configuration.default
     @decoder = Pocketsphinx::Decoder.new(@configuration)
   end
+  it 'reads cmninit configuration values from default acoustic model feat.params' do
+    expect(configuration.details('cmninit')[:default]).to eq("8.0")
+    expect(configuration.details('cmninit')[:value]).to eq("40,3,-1")
+  end
   describe '#decode' do
     it 'correctly decodes the speech in goforward.raw' do
-      @decoder.ps_api = nil
       subject.decode File.open('spec/assets/audio/goforward.raw', 'rb')
       expect(subject.hypothesis).to eq("go forward ten meters")
     end
+    # FIXME: This test illustrates a current issue discussed in:
+    #        https://github.com/watsonbox/pocketsphinx-ruby/issues/10
+    it 'incorrectly decodes the speech in hello.wav on first attempt' do
+      hypotheses = (1..2).map do
+        subject.decode File.open('spec/assets/audio/hello.wav', 'rb')
+        subject.hypothesis
+      end
+      expect(hypotheses).to eq(['oh', 'hello'])
+    end
     it 'accepts a file path as well as a stream' do
       subject.decode 'spec/assets/audio/goforward.raw'
       expect(subject.hypothesis).to eq("go forward ten meters")
     end
+    it 'reports words with start/end frame values' do
+      subject.decode File.open('spec/assets/audio/goforward.raw', 'rb')
+      expect(subject.words.map(&:word)).to eq(["<s>", "go", "forward", "ten", "meters", "</s>"])
+      expect(subject.words.map(&:start_frame)).to eq([0, 46, 64, 117, 153, 212])
+      expect(subject.words.map(&:end_frame)).to eq([45, 63, 116, 152, 211, 260])
+    end
   end
 end

data/spec/integration/default_recognition_spec.rb CHANGED

@@ -14,7 +14,7 @@ describe 'speech recognition with default configuration' do
   describe '#recognize' do
     it 'should decode speech in raw audio' do
-      expect { |b| subject.recognize('spec/assets/audio/goforward.raw', 4096, &b) }.
+      expect { |b| subject.recognize('spec/assets/audio/goforward.raw', 2048, &b) }.
         to yield_with_args("go forward ten meters")
     end
   end

data/spec/integration/grammar_recognition_spec.rb CHANGED

@@ -18,7 +18,7 @@ describe 'speech recognition with a grammar' do
   describe '#recognize' do
     it 'should decode speech in raw audio' do
-      expect { |b| subject.recognize(4096, &b) }.to yield_with_args("go forward ten meters")
+      expect { |b| subject.recognize(2048, &b) }.to yield_with_args("go forward ten meters")
     end
   end
 end

data/spec/integration/keyword_recognition_spec.rb CHANGED

@@ -19,7 +19,7 @@ describe 'keyword spotting' do
   describe '#recognize' do
     it 'should decode speech in raw audio' do
-      expect { |b| subject.recognize(4096, &b) }.to yield_with_args('forward')
+      expect { |b| subject.recognize(2048, &b) }.to yield_with_args('forward')
     end
   end
 end

data/spec/microphone_spec.rb CHANGED

@@ -72,16 +72,16 @@ describe Pocketsphinx::Microphone do
     it 'calls libsphinxad' do
       expect(ps_api)
         .to receive(:ad_read)
-        .with(subject.ps_audio_device, :buffer, 4096)
+        .with(subject.ps_audio_device, :buffer, 2048)
         .and_return(0)
-      subject.read_audio(:buffer, 4096)
+      subject.read_audio(:buffer, 2048)
     end
   end
   describe '#read_audio_delay' do
-    it 'should be 0.128 seconds for a max_samples of 4096 and sample rate of 16kHz' do
-      expect(subject.read_audio_delay(4096)).to eq(0.128)
+    it 'should be 0.064 seconds for a max_samples of 2048 and sample rate of 16kHz' do
+      expect(subject.read_audio_delay(2048)).to eq(0.064)
     end
   end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: pocketsphinx-ruby
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Howard Wilson
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-03-03 00:00:00.000000000 Z
+date: 2015-04-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ffi
@@ -122,6 +122,7 @@ files:
 - lib/pocketsphinx/version.rb
 - pocketsphinx-ruby.gemspec
 - spec/assets/audio/goforward.raw
+- spec/assets/audio/hello.wav
 - spec/assets/grammars/goforward.gram
 - spec/assets/grammars/invalid.gram
 - spec/assets/grammars/sentences.gram
@@ -162,6 +163,7 @@ specification_version: 4
 summary: Ruby speech recognition with Pocketsphinx
 test_files:
 - spec/assets/audio/goforward.raw
+- spec/assets/audio/hello.wav
 - spec/assets/grammars/goforward.gram
 - spec/assets/grammars/invalid.gram
 - spec/assets/grammars/sentences.gram