pocketsphinx-ruby 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b38998dd59c577300db4b63bfda52e037c49409
4
- data.tar.gz: 82dd10ee0f739d6459013513ae986d6e60124dfd
3
+ metadata.gz: 1be9c28022d45172119190075e2c4f340f129022
4
+ data.tar.gz: f00d504978967e201a795874fcdee1038cb96d03
5
5
  SHA512:
6
- metadata.gz: d11e77f70106d9beaab587e7c8372dc217a9e511e7058e6d4f0a55fb4ce040b5ad50ef203a9755324ebaccc6fe919919eb04e5bb4770a290413b9990cecbfec6
7
- data.tar.gz: fbea33173c9c8bdca4458c2dcb1f88e5643be169d8a54f4ed3baf593677f790d754e0f58a1c37ca527cbfeeeb9c0b19b28461afe417da509c2073543c33522ee
6
+ metadata.gz: 3407c5ce61d32c3576ce1fa6d16bc3249e0f74fc990b7b5dc8005c545e108cdca776e52e985354636a96ad626db2de90f71c5dd499ab29171b7b25a314cd5c26
7
+ data.tar.gz: b8be240f48b2962dd417c19589be7dba6a5b5460c9046deb66972507fe4655718884ad60d86ab8838c3428c946913e2e38ba5524bc1403f320ad01ee43a8e7f2
@@ -1,10 +1,11 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.1.2
3
+ - 2.2.2
4
+ - 2.1.6
4
5
  - 2.0.0
5
6
  - 1.9.3
6
7
  - rbx-2.2.9
7
- - jruby-1.7.16
8
+ - jruby-1.7.19
8
9
  before_install:
9
10
  - sudo apt-get update -qq
10
11
  - sudo apt-get install -y swig
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ **v0.3.0 - 17/04/15**
4
+
5
+ * Add Words support to Decoder
6
+ * Don't lazy initialize Pocketsphinx decoder
7
+ * Use buffer size of 2048 as now [required](https://github.com/cmusphinx/pocketsphinx/commit/541b5dfa87ef5fffe509d7c195803fd45749db5e) by Pocketsphinx
8
+
9
+
3
10
  **v0.2.0 - 03/03/15**
4
11
 
5
12
  * Updated to latest Pocketsphinx API interface
data/README.md CHANGED
@@ -69,7 +69,7 @@ The `AudioFileSpeechRecognizer` decodes directly from an audio file by coordinat
69
69
  recognizer = Pocketsphinx::AudioFileSpeechRecognizer.new
70
70
 
71
71
  recognizer.recognize('spec/assets/audio/goforward.raw') do |speech|
72
- puts speech # => "go forward ten years"
72
+ puts speech # => "go forward ten meters"
73
73
  end
74
74
  ```
75
75
 
@@ -110,9 +110,9 @@ microphone = Pocketsphinx::Microphone.new
110
110
 
111
111
  File.open("test.raw", "wb") do |file|
112
112
  microphone.record do
113
- FFI::MemoryPointer.new(:int16, 4096) do |buffer|
113
+ FFI::MemoryPointer.new(:int16, 2048) do |buffer|
114
114
  50.times do
115
- sample_count = microphone.read_audio(buffer, 4096)
115
+ sample_count = microphone.read_audio(buffer, 2048)
116
116
  file.write buffer.get_bytes(0, sample_count * 2)
117
117
 
118
118
  sleep 0.1
@@ -133,7 +133,27 @@ The `Decoder` class uses Pocketsphinx's libpocketsphinx to decode audio data int
133
133
  decoder = Pocketsphinx::Decoder.new(Pocketsphinx::Configuration.default)
134
134
  decoder.decode 'spec/assets/audio/goforward.raw'
135
135
 
136
- puts decoder.hypothesis # => "go forward ten years"
136
+ puts decoder.hypothesis # => "go forward ten meters"
137
+ ```
138
+
139
+ And split into individual words with frame data:
140
+
141
+ ```ruby
142
+ decoder.words
143
+ # => [
144
+ # #<struct Pocketsphinx::Decoder::Word word="<s>", start_frame=608, end_frame=610>,
145
+ # #<struct Pocketsphinx::Decoder::Word word="go", start_frame=611, end_frame=622>,
146
+ # #<struct Pocketsphinx::Decoder::Word word="forward", start_frame=623, end_frame=675>,
147
+ # #<struct Pocketsphinx::Decoder::Word word="ten", start_frame=676, end_frame=711>,
148
+ # #<struct Pocketsphinx::Decoder::Word word="meters", start_frame=712, end_frame=770>,
149
+ # #<struct Pocketsphinx::Decoder::Word word="</s>", start_frame=771, end_frame=821>
150
+ # ]
151
+ ```
152
+
153
+ Note: When the `Decoder` is initialized, the supplied `Configuration` is updated by Pocketsphinx with some settings from the acoustic model. To see exactly what's going on:
154
+
155
+ ```ruby
156
+ Pocketsphinx::Decoder.new(Pocketsphinx::Configuration.default).configuration.changes
137
157
  ```
138
158
 
139
159
 
@@ -8,4 +8,4 @@ include Pocketsphinx
8
8
  decoder = Decoder.new(Configuration.default)
9
9
  decoder.decode 'spec/assets/audio/goforward.raw'
10
10
 
11
- puts decoder.hypothesis # => "go forward ten years"
11
+ puts decoder.hypothesis # => "go forward ten meters"
@@ -5,7 +5,7 @@ require "pocketsphinx-ruby"
5
5
 
6
6
  include Pocketsphinx
7
7
 
8
- MAX_SAMPLES = 4096
8
+ MAX_SAMPLES = 2048
9
9
  RECORDING_INTERVAL = 0.1
10
10
  RECORDING_LENGTH = 5
11
11
 
@@ -7,7 +7,12 @@ module Pocketsphinx
7
7
  typedef :pointer, :decoder
8
8
  typedef :pointer, :configuration
9
9
 
10
- attach_function :ps_init, [:configuration], :decoder
10
+ # Allows expect(API::Pocketsphinx).to receive(:ps_init) in JRuby specs
11
+ def self.ps_init(*args)
12
+ ps_init_private(*args)
13
+ end
14
+
15
+ attach_function :ps_init_private, :ps_init, [:configuration], :decoder
11
16
  attach_function :ps_reinit, [:decoder, :configuration], :int
12
17
  attach_function :ps_default_search_args, [:pointer], :void
13
18
  attach_function :ps_args, [], :pointer
@@ -21,6 +26,15 @@ module Pocketsphinx
21
26
  attach_function :ps_unset_search, [:decoder, :string], :int
22
27
  attach_function :ps_get_search, [:decoder], :string
23
28
  attach_function :ps_set_search, [:decoder, :string], :int
29
+
30
+ typedef :pointer, :seg_iter
31
+
32
+ attach_function :ps_seg_iter, [:decoder, :pointer], :seg_iter
33
+ attach_function :ps_seg_next, [:seg_iter], :seg_iter
34
+ attach_function :ps_seg_word, [:seg_iter], :string
35
+ attach_function :ps_seg_frames, [:seg_iter, :pointer, :pointer], :void
36
+ attach_function :ps_seg_prob, [:seg_iter, :pointer, :pointer, :pointer], :int32
37
+ attach_function :ps_seg_free, [:seg_iter], :void
24
38
  end
25
39
  end
26
40
  end
@@ -6,7 +6,7 @@ module Pocketsphinx
6
6
  # @param [FFI::Pointer] buffer 16bit buffer of at least max_samples in size
7
7
  # @params [Fixnum] max_samples The maximum number of samples to read from the audio file
8
8
  # @return [Fixnum] Samples actually read; nil if EOF
9
- def read_audio(buffer, max_samples = 4096)
9
+ def read_audio(buffer, max_samples = 2048)
10
10
  if file.nil?
11
11
  raise "Can't read audio: use AudioFile#start_recording to open the file first"
12
12
  end
@@ -1,7 +1,7 @@
1
1
  module Pocketsphinx
2
2
  # High-level class for live speech recognition from a raw audio file.
3
3
  class AudioFileSpeechRecognizer < SpeechRecognizer
4
- def recognize(file_path, max_samples = 4096)
4
+ def recognize(file_path, max_samples = 2048)
5
5
  self.recordable = AudioFile.new(file_path)
6
6
 
7
7
  super(max_samples) do |speech|
@@ -1,5 +1,5 @@
1
1
  module Pocketsphinx
2
- class Decoder < Struct.new(:configuration)
2
+ class Decoder
3
3
  require 'delegate'
4
4
 
5
5
  include API::CallHelpers
@@ -13,7 +13,22 @@ module Pocketsphinx
13
13
  end
14
14
  end
15
15
 
16
+ Word = Struct.new(:word, :start_frame, :end_frame)
17
+
16
18
  attr_writer :ps_api
19
+ attr_accessor :configuration
20
+
21
+ # Initialize a Decoder
22
+ #
23
+ # Note that this initialization process actually updates the Configuration based on settings
24
+ # which are found in feat.params along with the acoustic model.
25
+ #
26
+ # @param [Configuration] configuration
27
+ # @param [FFI::Pointer] ps_decoder An optional Pocketsphinx decoder. One is initialized if not provided.
28
+ def initialize(configuration, ps_decoder = nil)
29
+ @configuration = configuration
30
+ init_decoder if ps_decoder.nil?
31
+ end
17
32
 
18
33
  # Reinitialize the decoder with updated configuration.
19
34
  #
@@ -108,6 +123,30 @@ module Pocketsphinx
108
123
  )
109
124
  end
110
125
 
126
+ # Get an array of words with start/end frame values (10msec/frame) for current hypothesis
127
+ #
128
+ # @return [Array] Array of words with start/end frame values (10msec/frame)
129
+ def words
130
+ mp_path_score = FFI::MemoryPointer.new(:int32, 1)
131
+ start_frame = FFI::MemoryPointer.new(:int32, 1)
132
+ end_frame = FFI::MemoryPointer.new(:int32, 1)
133
+
134
+ seg_iter = ps_api.ps_seg_iter(ps_decoder, mp_path_score)
135
+ words = []
136
+
137
+ until seg_iter.null? do
138
+ ps_api.ps_seg_frames(seg_iter, start_frame, end_frame)
139
+ words << Pocketsphinx::Decoder::Word.new(
140
+ ps_api.ps_seg_word(seg_iter),
141
+ start_frame.get_int32(0),
142
+ end_frame.get_int32(0)
143
+ )
144
+ seg_iter = ps_api.ps_seg_next(seg_iter)
145
+ end
146
+
147
+ words
148
+ end
149
+
111
150
  # Adds new search using JSGF model.
112
151
  #
113
152
  # Convenience method to parse JSGF model from string and create a search.
@@ -49,7 +49,7 @@ module Pocketsphinx
49
49
  # @params [Fixnum] max_samples The maximum number of samples to read from the audio device
50
50
  # @return [Fixnum] Samples actually read (could be 0 since non-blocking); nil if not
51
51
  # recording and no more samples remaining to be read from most recent recording.
52
- def read_audio(buffer, max_samples = 4096)
52
+ def read_audio(buffer, max_samples = 2048)
53
53
  samples = ps_api.ad_read(@ps_audio_device, buffer, max_samples)
54
54
  samples if samples >= 0
55
55
  end
@@ -60,7 +60,7 @@ module Pocketsphinx
60
60
  # we specify a delay which should fill half of the max buffer size
61
61
  #
62
62
  # @param [Fixnum] max_samples The maximum samples we tried to read from the audio device
63
- def read_audio_delay(max_samples = 4096)
63
+ def read_audio_delay(max_samples = 2048)
64
64
  max_samples.to_f / (2 * sample_rate)
65
65
  end
66
66
 
@@ -43,7 +43,7 @@ module Pocketsphinx
43
43
  # Recognize speech and yield hypotheses in infinite loop
44
44
  #
45
45
  # @param [Fixnum] max_samples Number of samples to process at a time
46
- def recognize(max_samples = 4096, &b)
46
+ def recognize(max_samples = 2048, &b)
47
47
  unless ALGORITHMS.include?(algorithm)
48
48
  raise NotImplementedError, "Unknown speech recognition algorithm: #{algorithm}"
49
49
  end
@@ -1,3 +1,3 @@
1
1
  module Pocketsphinx
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -65,7 +65,7 @@ describe Pocketsphinx::Configuration do
65
65
 
66
66
  describe '#setting_names' do
67
67
  it 'contains the names of all possible system settings' do
68
- expect(subject.setting_names.count).to eq(112)
68
+ expect(subject.setting_names.count).to eq(114)
69
69
  end
70
70
  end
71
71
 
@@ -84,7 +84,7 @@ describe Pocketsphinx::Configuration do
84
84
  it 'gives details for all settings when no name is specified' do
85
85
  details = subject.details
86
86
 
87
- expect(details.count).to eq(112)
87
+ expect(details.count).to eq(114)
88
88
  expect(details.first).to eq({
89
89
  name: "agc",
90
90
  type: :string,
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Pocketsphinx::Decoder do
4
- subject { Pocketsphinx::Decoder.new(configuration) }
4
+ subject { Pocketsphinx::Decoder.new(configuration, ps_decoder) }
5
5
  let(:ps_api) { subject.ps_api }
6
6
  let(:ps_decoder) { double }
7
7
  let(:configuration) { Pocketsphinx::Configuration.default }
@@ -11,6 +11,17 @@ describe Pocketsphinx::Decoder do
11
11
  allow(ps_api).to receive(:ps_init).and_return(ps_decoder)
12
12
  end
13
13
 
14
+ describe 'initialization' do
15
+ it 'initializes the underlying Pocketsphinx decoder when one is not provided' do
16
+ expect(Pocketsphinx::API::Pocketsphinx)
17
+ .to receive(:ps_init)
18
+ .with(configuration.ps_config)
19
+ .and_return(ps_decoder)
20
+
21
+ Pocketsphinx::Decoder.new(configuration)
22
+ end
23
+ end
24
+
14
25
  describe '#reconfigure' do
15
26
  it 'calls libpocketsphinx and the configuration post initialize hook' do
16
27
  expect(ps_api)
@@ -53,24 +64,24 @@ describe Pocketsphinx::Decoder do
53
64
 
54
65
  describe '#process_raw' do
55
66
  it 'calls libpocketsphinx' do
56
- FFI::MemoryPointer.new(:int16, 4096) do |buffer|
67
+ FFI::MemoryPointer.new(:int16, 2048) do |buffer|
57
68
  expect(ps_api)
58
69
  .to receive(:ps_process_raw)
59
- .with(subject.ps_decoder, buffer, 4096, 0, 0)
70
+ .with(subject.ps_decoder, buffer, 2048, 0, 0)
60
71
  .and_return(0)
61
72
 
62
- subject.process_raw(buffer, 4096, false, false)
73
+ subject.process_raw(buffer, 2048, false, false)
63
74
  end
64
75
  end
65
76
 
66
77
  it 'raises an exception on error' do
67
- FFI::MemoryPointer.new(:int16, 4096) do |buffer|
78
+ FFI::MemoryPointer.new(:int16, 2048) do |buffer|
68
79
  expect(ps_api)
69
80
  .to receive(:ps_process_raw)
70
- .with(subject.ps_decoder, buffer, 4096, 0, 0)
81
+ .with(subject.ps_decoder, buffer, 2048, 0, 0)
71
82
  .and_return(-1)
72
83
 
73
- expect { subject.process_raw(buffer, 4096, false, false) }
84
+ expect { subject.process_raw(buffer, 2048, false, false) }
74
85
  .to raise_exception "Decoder#process_raw failed with error code -1"
75
86
  end
76
87
  end
@@ -148,6 +159,35 @@ describe Pocketsphinx::Decoder do
148
159
  end
149
160
  end
150
161
 
162
+ describe '#words' do
163
+ let(:iterator) { FFI::MemoryPointer.from_string("") }
164
+
165
+ it 'calls libpocketsphinx' do
166
+ expect(ps_api).to receive(:ps_seg_iter).ordered.and_return(iterator)
167
+
168
+ expect(ps_api).to receive(:ps_seg_frames).ordered do |seg_iter, start_frame, end_frame|
169
+ start_frame.put_int16(0, 10)
170
+ end_frame.put_int16(0, 20)
171
+ end
172
+
173
+ expect(ps_api).to receive(:ps_seg_word).ordered.and_return("one")
174
+ expect(ps_api).to receive(:ps_seg_next).ordered.and_return(iterator)
175
+
176
+ expect(ps_api).to receive(:ps_seg_frames).ordered do |seg_iter, start_frame, end_frame|
177
+ start_frame.put_int16(0, 30)
178
+ end_frame.put_int16(0, 40)
179
+ end
180
+
181
+ expect(ps_api).to receive(:ps_seg_word).ordered.and_return("two")
182
+ expect(ps_api).to receive(:ps_seg_next).ordered.and_return(FFI::Pointer::NULL)
183
+
184
+ words = subject.words
185
+
186
+ expect(words[0]).to eq(Pocketsphinx::Decoder::Word.new("one", 10, 20))
187
+ expect(words[1]).to eq(Pocketsphinx::Decoder::Word.new("two", 30, 40))
188
+ end
189
+ end
190
+
151
191
  describe '#set_jsgf_string' do
152
192
  it 'calls libpocketsphinx' do
153
193
  expect(ps_api)
@@ -5,22 +5,44 @@ describe Pocketsphinx::Decoder do
5
5
  let(:configuration) { @configuration }
6
6
 
7
7
  # Share decoder across all examples for speed
8
- before :all do
8
+ before do
9
9
  @configuration = Pocketsphinx::Configuration.default
10
10
  @decoder = Pocketsphinx::Decoder.new(@configuration)
11
11
  end
12
12
 
13
+ it 'reads cmninit configuration values from default acoustic model feat.params' do
14
+ expect(configuration.details('cmninit')[:default]).to eq("8.0")
15
+ expect(configuration.details('cmninit')[:value]).to eq("40,3,-1")
16
+ end
17
+
13
18
  describe '#decode' do
14
19
  it 'correctly decodes the speech in goforward.raw' do
15
- @decoder.ps_api = nil
16
20
  subject.decode File.open('spec/assets/audio/goforward.raw', 'rb')
17
-
18
21
  expect(subject.hypothesis).to eq("go forward ten meters")
19
22
  end
20
23
 
24
+ # FIXME: This test illustrates a current issue discussed in:
25
+ # https://github.com/watsonbox/pocketsphinx-ruby/issues/10
26
+ it 'incorrectly decodes the speech in hello.wav on first attempt' do
27
+ hypotheses = (1..2).map do
28
+ subject.decode File.open('spec/assets/audio/hello.wav', 'rb')
29
+ subject.hypothesis
30
+ end
31
+
32
+ expect(hypotheses).to eq(['oh', 'hello'])
33
+ end
34
+
21
35
  it 'accepts a file path as well as a stream' do
22
36
  subject.decode 'spec/assets/audio/goforward.raw'
23
37
  expect(subject.hypothesis).to eq("go forward ten meters")
24
38
  end
39
+
40
+ it 'reports words with start/end frame values' do
41
+ subject.decode File.open('spec/assets/audio/goforward.raw', 'rb')
42
+
43
+ expect(subject.words.map(&:word)).to eq(["<s>", "go", "forward", "ten", "meters", "</s>"])
44
+ expect(subject.words.map(&:start_frame)).to eq([0, 46, 64, 117, 153, 212])
45
+ expect(subject.words.map(&:end_frame)).to eq([45, 63, 116, 152, 211, 260])
46
+ end
25
47
  end
26
48
  end
@@ -14,7 +14,7 @@ describe 'speech recognition with default configuration' do
14
14
 
15
15
  describe '#recognize' do
16
16
  it 'should decode speech in raw audio' do
17
- expect { |b| subject.recognize('spec/assets/audio/goforward.raw', 4096, &b) }.
17
+ expect { |b| subject.recognize('spec/assets/audio/goforward.raw', 2048, &b) }.
18
18
  to yield_with_args("go forward ten meters")
19
19
  end
20
20
  end
@@ -18,7 +18,7 @@ describe 'speech recognition with a grammar' do
18
18
 
19
19
  describe '#recognize' do
20
20
  it 'should decode speech in raw audio' do
21
- expect { |b| subject.recognize(4096, &b) }.to yield_with_args("go forward ten meters")
21
+ expect { |b| subject.recognize(2048, &b) }.to yield_with_args("go forward ten meters")
22
22
  end
23
23
  end
24
24
  end
@@ -19,7 +19,7 @@ describe 'keyword spotting' do
19
19
 
20
20
  describe '#recognize' do
21
21
  it 'should decode speech in raw audio' do
22
- expect { |b| subject.recognize(4096, &b) }.to yield_with_args('forward')
22
+ expect { |b| subject.recognize(2048, &b) }.to yield_with_args('forward')
23
23
  end
24
24
  end
25
25
  end
@@ -72,16 +72,16 @@ describe Pocketsphinx::Microphone do
72
72
  it 'calls libsphinxad' do
73
73
  expect(ps_api)
74
74
  .to receive(:ad_read)
75
- .with(subject.ps_audio_device, :buffer, 4096)
75
+ .with(subject.ps_audio_device, :buffer, 2048)
76
76
  .and_return(0)
77
77
 
78
- subject.read_audio(:buffer, 4096)
78
+ subject.read_audio(:buffer, 2048)
79
79
  end
80
80
  end
81
81
 
82
82
  describe '#read_audio_delay' do
83
- it 'should be 0.128 seconds for a max_samples of 4096 and sample rate of 16kHz' do
84
- expect(subject.read_audio_delay(4096)).to eq(0.128)
83
+ it 'should be 0.064 seconds for a max_samples of 2048 and sample rate of 16kHz' do
84
+ expect(subject.read_audio_delay(2048)).to eq(0.064)
85
85
  end
86
86
  end
87
87
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pocketsphinx-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Howard Wilson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-03 00:00:00.000000000 Z
11
+ date: 2015-04-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -122,6 +122,7 @@ files:
122
122
  - lib/pocketsphinx/version.rb
123
123
  - pocketsphinx-ruby.gemspec
124
124
  - spec/assets/audio/goforward.raw
125
+ - spec/assets/audio/hello.wav
125
126
  - spec/assets/grammars/goforward.gram
126
127
  - spec/assets/grammars/invalid.gram
127
128
  - spec/assets/grammars/sentences.gram
@@ -162,6 +163,7 @@ specification_version: 4
162
163
  summary: Ruby speech recognition with Pocketsphinx
163
164
  test_files:
164
165
  - spec/assets/audio/goforward.raw
166
+ - spec/assets/audio/hello.wav
165
167
  - spec/assets/grammars/goforward.gram
166
168
  - spec/assets/grammars/invalid.gram
167
169
  - spec/assets/grammars/sentences.gram