pocketsphinx-ruby 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b38998dd59c577300db4b63bfda52e037c49409
4
- data.tar.gz: 82dd10ee0f739d6459013513ae986d6e60124dfd
3
+ metadata.gz: 1be9c28022d45172119190075e2c4f340f129022
4
+ data.tar.gz: f00d504978967e201a795874fcdee1038cb96d03
5
5
  SHA512:
6
- metadata.gz: d11e77f70106d9beaab587e7c8372dc217a9e511e7058e6d4f0a55fb4ce040b5ad50ef203a9755324ebaccc6fe919919eb04e5bb4770a290413b9990cecbfec6
7
- data.tar.gz: fbea33173c9c8bdca4458c2dcb1f88e5643be169d8a54f4ed3baf593677f790d754e0f58a1c37ca527cbfeeeb9c0b19b28461afe417da509c2073543c33522ee
6
+ metadata.gz: 3407c5ce61d32c3576ce1fa6d16bc3249e0f74fc990b7b5dc8005c545e108cdca776e52e985354636a96ad626db2de90f71c5dd499ab29171b7b25a314cd5c26
7
+ data.tar.gz: b8be240f48b2962dd417c19589be7dba6a5b5460c9046deb66972507fe4655718884ad60d86ab8838c3428c946913e2e38ba5524bc1403f320ad01ee43a8e7f2
@@ -1,10 +1,11 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.1.2
3
+ - 2.2.2
4
+ - 2.1.6
4
5
  - 2.0.0
5
6
  - 1.9.3
6
7
  - rbx-2.2.9
7
- - jruby-1.7.16
8
+ - jruby-1.7.19
8
9
  before_install:
9
10
  - sudo apt-get update -qq
10
11
  - sudo apt-get install -y swig
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ **v0.3.0 - 17/04/15**
4
+
5
+ * Add Words support to Decoder
6
+ * Don't lazy initialize Pocketsphinx decoder
7
+ * Use buffer size of 2048 as now [required](https://github.com/cmusphinx/pocketsphinx/commit/541b5dfa87ef5fffe509d7c195803fd45749db5e) by Pocketsphinx
8
+
9
+
3
10
  **v0.2.0 - 03/03/15**
4
11
 
5
12
  * Updated to latest Pocketsphinx API interface
data/README.md CHANGED
@@ -69,7 +69,7 @@ The `AudioFileSpeechRecognizer` decodes directly from an audio file by coordinat
69
69
  recognizer = Pocketsphinx::AudioFileSpeechRecognizer.new
70
70
 
71
71
  recognizer.recognize('spec/assets/audio/goforward.raw') do |speech|
72
- puts speech # => "go forward ten years"
72
+ puts speech # => "go forward ten meters"
73
73
  end
74
74
  ```
75
75
 
@@ -110,9 +110,9 @@ microphone = Pocketsphinx::Microphone.new
110
110
 
111
111
  File.open("test.raw", "wb") do |file|
112
112
  microphone.record do
113
- FFI::MemoryPointer.new(:int16, 4096) do |buffer|
113
+ FFI::MemoryPointer.new(:int16, 2048) do |buffer|
114
114
  50.times do
115
- sample_count = microphone.read_audio(buffer, 4096)
115
+ sample_count = microphone.read_audio(buffer, 2048)
116
116
  file.write buffer.get_bytes(0, sample_count * 2)
117
117
 
118
118
  sleep 0.1
@@ -133,7 +133,27 @@ The `Decoder` class uses Pocketsphinx's libpocketsphinx to decode audio data int
133
133
  decoder = Pocketsphinx::Decoder.new(Pocketsphinx::Configuration.default)
134
134
  decoder.decode 'spec/assets/audio/goforward.raw'
135
135
 
136
- puts decoder.hypothesis # => "go forward ten years"
136
+ puts decoder.hypothesis # => "go forward ten meters"
137
+ ```
138
+
139
+ And split into individual words with frame data:
140
+
141
+ ```ruby
142
+ decoder.words
143
+ # => [
144
+ # #<struct Pocketsphinx::Decoder::Word word="<s>", start_frame=608, end_frame=610>,
145
+ # #<struct Pocketsphinx::Decoder::Word word="go", start_frame=611, end_frame=622>,
146
+ # #<struct Pocketsphinx::Decoder::Word word="forward", start_frame=623, end_frame=675>,
147
+ # #<struct Pocketsphinx::Decoder::Word word="ten", start_frame=676, end_frame=711>,
148
+ # #<struct Pocketsphinx::Decoder::Word word="meters", start_frame=712, end_frame=770>,
149
+ # #<struct Pocketsphinx::Decoder::Word word="</s>", start_frame=771, end_frame=821>
150
+ # ]
151
+ ```
152
+
153
+ Note: When the `Decoder` is initialized, the supplied `Configuration` is updated by Pocketsphinx with some settings from the acoustic model. To see exactly what's going on:
154
+
155
+ ```ruby
156
+ Pocketsphinx::Decoder.new(Pocketsphinx::Configuration.default).configuration.changes
137
157
  ```
138
158
 
139
159
 
@@ -8,4 +8,4 @@ include Pocketsphinx
8
8
  decoder = Decoder.new(Configuration.default)
9
9
  decoder.decode 'spec/assets/audio/goforward.raw'
10
10
 
11
- puts decoder.hypothesis # => "go forward ten years"
11
+ puts decoder.hypothesis # => "go forward ten meters"
@@ -5,7 +5,7 @@ require "pocketsphinx-ruby"
5
5
 
6
6
  include Pocketsphinx
7
7
 
8
- MAX_SAMPLES = 4096
8
+ MAX_SAMPLES = 2048
9
9
  RECORDING_INTERVAL = 0.1
10
10
  RECORDING_LENGTH = 5
11
11
 
@@ -7,7 +7,12 @@ module Pocketsphinx
7
7
  typedef :pointer, :decoder
8
8
  typedef :pointer, :configuration
9
9
 
10
- attach_function :ps_init, [:configuration], :decoder
10
+ # Allows expect(API::Pocketsphinx).to receive(:ps_init) in JRuby specs
11
+ def self.ps_init(*args)
12
+ ps_init_private(*args)
13
+ end
14
+
15
+ attach_function :ps_init_private, :ps_init, [:configuration], :decoder
11
16
  attach_function :ps_reinit, [:decoder, :configuration], :int
12
17
  attach_function :ps_default_search_args, [:pointer], :void
13
18
  attach_function :ps_args, [], :pointer
@@ -21,6 +26,15 @@ module Pocketsphinx
21
26
  attach_function :ps_unset_search, [:decoder, :string], :int
22
27
  attach_function :ps_get_search, [:decoder], :string
23
28
  attach_function :ps_set_search, [:decoder, :string], :int
29
+
30
+ typedef :pointer, :seg_iter
31
+
32
+ attach_function :ps_seg_iter, [:decoder, :pointer], :seg_iter
33
+ attach_function :ps_seg_next, [:seg_iter], :seg_iter
34
+ attach_function :ps_seg_word, [:seg_iter], :string
35
+ attach_function :ps_seg_frames, [:seg_iter, :pointer, :pointer], :void
36
+ attach_function :ps_seg_prob, [:seg_iter, :pointer, :pointer, :pointer], :int32
37
+ attach_function :ps_seg_free, [:seg_iter], :void
24
38
  end
25
39
  end
26
40
  end
@@ -6,7 +6,7 @@ module Pocketsphinx
6
6
  # @param [FFI::Pointer] buffer 16bit buffer of at least max_samples in size
7
7
  # @params [Fixnum] max_samples The maximum number of samples to read from the audio file
8
8
  # @return [Fixnum] Samples actually read; nil if EOF
9
- def read_audio(buffer, max_samples = 4096)
9
+ def read_audio(buffer, max_samples = 2048)
10
10
  if file.nil?
11
11
  raise "Can't read audio: use AudioFile#start_recording to open the file first"
12
12
  end
@@ -1,7 +1,7 @@
1
1
  module Pocketsphinx
2
2
  # High-level class for live speech recognition from a raw audio file.
3
3
  class AudioFileSpeechRecognizer < SpeechRecognizer
4
- def recognize(file_path, max_samples = 4096)
4
+ def recognize(file_path, max_samples = 2048)
5
5
  self.recordable = AudioFile.new(file_path)
6
6
 
7
7
  super(max_samples) do |speech|
@@ -1,5 +1,5 @@
1
1
  module Pocketsphinx
2
- class Decoder < Struct.new(:configuration)
2
+ class Decoder
3
3
  require 'delegate'
4
4
 
5
5
  include API::CallHelpers
@@ -13,7 +13,22 @@ module Pocketsphinx
13
13
  end
14
14
  end
15
15
 
16
+ Word = Struct.new(:word, :start_frame, :end_frame)
17
+
16
18
  attr_writer :ps_api
19
+ attr_accessor :configuration
20
+
21
+ # Initialize a Decoder
22
+ #
23
+ # Note that this initialization process actually updates the Configuration based on settings
24
+ # which are found in feat.params along with the acoustic model.
25
+ #
26
+ # @param [Configuration] configuration
27
+ # @param [FFI::Pointer] ps_decoder An optional Pocketsphinx decoder. One is initialized if not provided.
28
+ def initialize(configuration, ps_decoder = nil)
29
+ @configuration = configuration
30
+ init_decoder if ps_decoder.nil?
31
+ end
17
32
 
18
33
  # Reinitialize the decoder with updated configuration.
19
34
  #
@@ -108,6 +123,30 @@ module Pocketsphinx
108
123
  )
109
124
  end
110
125
 
126
+ # Get an array of words with start/end frame values (10msec/frame) for current hypothesis
127
+ #
128
+ # @return [Array] Array of words with start/end frame values (10msec/frame)
129
+ def words
130
+ mp_path_score = FFI::MemoryPointer.new(:int32, 1)
131
+ start_frame = FFI::MemoryPointer.new(:int32, 1)
132
+ end_frame = FFI::MemoryPointer.new(:int32, 1)
133
+
134
+ seg_iter = ps_api.ps_seg_iter(ps_decoder, mp_path_score)
135
+ words = []
136
+
137
+ until seg_iter.null? do
138
+ ps_api.ps_seg_frames(seg_iter, start_frame, end_frame)
139
+ words << Pocketsphinx::Decoder::Word.new(
140
+ ps_api.ps_seg_word(seg_iter),
141
+ start_frame.get_int32(0),
142
+ end_frame.get_int32(0)
143
+ )
144
+ seg_iter = ps_api.ps_seg_next(seg_iter)
145
+ end
146
+
147
+ words
148
+ end
149
+
111
150
  # Adds new search using JSGF model.
112
151
  #
113
152
  # Convenience method to parse JSGF model from string and create a search.
@@ -49,7 +49,7 @@ module Pocketsphinx
49
49
  # @params [Fixnum] max_samples The maximum number of samples to read from the audio device
50
50
  # @return [Fixnum] Samples actually read (could be 0 since non-blocking); nil if not
51
51
  # recording and no more samples remaining to be read from most recent recording.
52
- def read_audio(buffer, max_samples = 4096)
52
+ def read_audio(buffer, max_samples = 2048)
53
53
  samples = ps_api.ad_read(@ps_audio_device, buffer, max_samples)
54
54
  samples if samples >= 0
55
55
  end
@@ -60,7 +60,7 @@ module Pocketsphinx
60
60
  # we specify a delay which should fill half of the max buffer size
61
61
  #
62
62
  # @param [Fixnum] max_samples The maximum samples we tried to read from the audio device
63
- def read_audio_delay(max_samples = 4096)
63
+ def read_audio_delay(max_samples = 2048)
64
64
  max_samples.to_f / (2 * sample_rate)
65
65
  end
66
66
 
@@ -43,7 +43,7 @@ module Pocketsphinx
43
43
  # Recognize speech and yield hypotheses in infinite loop
44
44
  #
45
45
  # @param [Fixnum] max_samples Number of samples to process at a time
46
- def recognize(max_samples = 4096, &b)
46
+ def recognize(max_samples = 2048, &b)
47
47
  unless ALGORITHMS.include?(algorithm)
48
48
  raise NotImplementedError, "Unknown speech recognition algorithm: #{algorithm}"
49
49
  end
@@ -1,3 +1,3 @@
1
1
  module Pocketsphinx
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -65,7 +65,7 @@ describe Pocketsphinx::Configuration do
65
65
 
66
66
  describe '#setting_names' do
67
67
  it 'contains the names of all possible system settings' do
68
- expect(subject.setting_names.count).to eq(112)
68
+ expect(subject.setting_names.count).to eq(114)
69
69
  end
70
70
  end
71
71
 
@@ -84,7 +84,7 @@ describe Pocketsphinx::Configuration do
84
84
  it 'gives details for all settings when no name is specified' do
85
85
  details = subject.details
86
86
 
87
- expect(details.count).to eq(112)
87
+ expect(details.count).to eq(114)
88
88
  expect(details.first).to eq({
89
89
  name: "agc",
90
90
  type: :string,
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Pocketsphinx::Decoder do
4
- subject { Pocketsphinx::Decoder.new(configuration) }
4
+ subject { Pocketsphinx::Decoder.new(configuration, ps_decoder) }
5
5
  let(:ps_api) { subject.ps_api }
6
6
  let(:ps_decoder) { double }
7
7
  let(:configuration) { Pocketsphinx::Configuration.default }
@@ -11,6 +11,17 @@ describe Pocketsphinx::Decoder do
11
11
  allow(ps_api).to receive(:ps_init).and_return(ps_decoder)
12
12
  end
13
13
 
14
+ describe 'initialization' do
15
+ it 'initializes the underlying Pocketsphinx decoder when one is not provided' do
16
+ expect(Pocketsphinx::API::Pocketsphinx)
17
+ .to receive(:ps_init)
18
+ .with(configuration.ps_config)
19
+ .and_return(ps_decoder)
20
+
21
+ Pocketsphinx::Decoder.new(configuration)
22
+ end
23
+ end
24
+
14
25
  describe '#reconfigure' do
15
26
  it 'calls libpocketsphinx and the configuration post initialize hook' do
16
27
  expect(ps_api)
@@ -53,24 +64,24 @@ describe Pocketsphinx::Decoder do
53
64
 
54
65
  describe '#process_raw' do
55
66
  it 'calls libpocketsphinx' do
56
- FFI::MemoryPointer.new(:int16, 4096) do |buffer|
67
+ FFI::MemoryPointer.new(:int16, 2048) do |buffer|
57
68
  expect(ps_api)
58
69
  .to receive(:ps_process_raw)
59
- .with(subject.ps_decoder, buffer, 4096, 0, 0)
70
+ .with(subject.ps_decoder, buffer, 2048, 0, 0)
60
71
  .and_return(0)
61
72
 
62
- subject.process_raw(buffer, 4096, false, false)
73
+ subject.process_raw(buffer, 2048, false, false)
63
74
  end
64
75
  end
65
76
 
66
77
  it 'raises an exception on error' do
67
- FFI::MemoryPointer.new(:int16, 4096) do |buffer|
78
+ FFI::MemoryPointer.new(:int16, 2048) do |buffer|
68
79
  expect(ps_api)
69
80
  .to receive(:ps_process_raw)
70
- .with(subject.ps_decoder, buffer, 4096, 0, 0)
81
+ .with(subject.ps_decoder, buffer, 2048, 0, 0)
71
82
  .and_return(-1)
72
83
 
73
- expect { subject.process_raw(buffer, 4096, false, false) }
84
+ expect { subject.process_raw(buffer, 2048, false, false) }
74
85
  .to raise_exception "Decoder#process_raw failed with error code -1"
75
86
  end
76
87
  end
@@ -148,6 +159,35 @@ describe Pocketsphinx::Decoder do
148
159
  end
149
160
  end
150
161
 
162
+ describe '#words' do
163
+ let(:iterator) { FFI::MemoryPointer.from_string("") }
164
+
165
+ it 'calls libpocketsphinx' do
166
+ expect(ps_api).to receive(:ps_seg_iter).ordered.and_return(iterator)
167
+
168
+ expect(ps_api).to receive(:ps_seg_frames).ordered do |seg_iter, start_frame, end_frame|
169
+ start_frame.put_int16(0, 10)
170
+ end_frame.put_int16(0, 20)
171
+ end
172
+
173
+ expect(ps_api).to receive(:ps_seg_word).ordered.and_return("one")
174
+ expect(ps_api).to receive(:ps_seg_next).ordered.and_return(iterator)
175
+
176
+ expect(ps_api).to receive(:ps_seg_frames).ordered do |seg_iter, start_frame, end_frame|
177
+ start_frame.put_int16(0, 30)
178
+ end_frame.put_int16(0, 40)
179
+ end
180
+
181
+ expect(ps_api).to receive(:ps_seg_word).ordered.and_return("two")
182
+ expect(ps_api).to receive(:ps_seg_next).ordered.and_return(FFI::Pointer::NULL)
183
+
184
+ words = subject.words
185
+
186
+ expect(words[0]).to eq(Pocketsphinx::Decoder::Word.new("one", 10, 20))
187
+ expect(words[1]).to eq(Pocketsphinx::Decoder::Word.new("two", 30, 40))
188
+ end
189
+ end
190
+
151
191
  describe '#set_jsgf_string' do
152
192
  it 'calls libpocketsphinx' do
153
193
  expect(ps_api)
@@ -5,22 +5,44 @@ describe Pocketsphinx::Decoder do
5
5
  let(:configuration) { @configuration }
6
6
 
7
7
  # Share decoder across all examples for speed
8
- before :all do
8
+ before do
9
9
  @configuration = Pocketsphinx::Configuration.default
10
10
  @decoder = Pocketsphinx::Decoder.new(@configuration)
11
11
  end
12
12
 
13
+ it 'reads cmninit configuration values from default acoustic model feat.params' do
14
+ expect(configuration.details('cmninit')[:default]).to eq("8.0")
15
+ expect(configuration.details('cmninit')[:value]).to eq("40,3,-1")
16
+ end
17
+
13
18
  describe '#decode' do
14
19
  it 'correctly decodes the speech in goforward.raw' do
15
- @decoder.ps_api = nil
16
20
  subject.decode File.open('spec/assets/audio/goforward.raw', 'rb')
17
-
18
21
  expect(subject.hypothesis).to eq("go forward ten meters")
19
22
  end
20
23
 
24
+ # FIXME: This test illustrates a current issue discussed in:
25
+ # https://github.com/watsonbox/pocketsphinx-ruby/issues/10
26
+ it 'incorrectly decodes the speech in hello.wav on first attempt' do
27
+ hypotheses = (1..2).map do
28
+ subject.decode File.open('spec/assets/audio/hello.wav', 'rb')
29
+ subject.hypothesis
30
+ end
31
+
32
+ expect(hypotheses).to eq(['oh', 'hello'])
33
+ end
34
+
21
35
  it 'accepts a file path as well as a stream' do
22
36
  subject.decode 'spec/assets/audio/goforward.raw'
23
37
  expect(subject.hypothesis).to eq("go forward ten meters")
24
38
  end
39
+
40
+ it 'reports words with start/end frame values' do
41
+ subject.decode File.open('spec/assets/audio/goforward.raw', 'rb')
42
+
43
+ expect(subject.words.map(&:word)).to eq(["<s>", "go", "forward", "ten", "meters", "</s>"])
44
+ expect(subject.words.map(&:start_frame)).to eq([0, 46, 64, 117, 153, 212])
45
+ expect(subject.words.map(&:end_frame)).to eq([45, 63, 116, 152, 211, 260])
46
+ end
25
47
  end
26
48
  end
@@ -14,7 +14,7 @@ describe 'speech recognition with default configuration' do
14
14
 
15
15
  describe '#recognize' do
16
16
  it 'should decode speech in raw audio' do
17
- expect { |b| subject.recognize('spec/assets/audio/goforward.raw', 4096, &b) }.
17
+ expect { |b| subject.recognize('spec/assets/audio/goforward.raw', 2048, &b) }.
18
18
  to yield_with_args("go forward ten meters")
19
19
  end
20
20
  end
@@ -18,7 +18,7 @@ describe 'speech recognition with a grammar' do
18
18
 
19
19
  describe '#recognize' do
20
20
  it 'should decode speech in raw audio' do
21
- expect { |b| subject.recognize(4096, &b) }.to yield_with_args("go forward ten meters")
21
+ expect { |b| subject.recognize(2048, &b) }.to yield_with_args("go forward ten meters")
22
22
  end
23
23
  end
24
24
  end
@@ -19,7 +19,7 @@ describe 'keyword spotting' do
19
19
 
20
20
  describe '#recognize' do
21
21
  it 'should decode speech in raw audio' do
22
- expect { |b| subject.recognize(4096, &b) }.to yield_with_args('forward')
22
+ expect { |b| subject.recognize(2048, &b) }.to yield_with_args('forward')
23
23
  end
24
24
  end
25
25
  end
@@ -72,16 +72,16 @@ describe Pocketsphinx::Microphone do
72
72
  it 'calls libsphinxad' do
73
73
  expect(ps_api)
74
74
  .to receive(:ad_read)
75
- .with(subject.ps_audio_device, :buffer, 4096)
75
+ .with(subject.ps_audio_device, :buffer, 2048)
76
76
  .and_return(0)
77
77
 
78
- subject.read_audio(:buffer, 4096)
78
+ subject.read_audio(:buffer, 2048)
79
79
  end
80
80
  end
81
81
 
82
82
  describe '#read_audio_delay' do
83
- it 'should be 0.128 seconds for a max_samples of 4096 and sample rate of 16kHz' do
84
- expect(subject.read_audio_delay(4096)).to eq(0.128)
83
+ it 'should be 0.064 seconds for a max_samples of 2048 and sample rate of 16kHz' do
84
+ expect(subject.read_audio_delay(2048)).to eq(0.064)
85
85
  end
86
86
  end
87
87
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pocketsphinx-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Howard Wilson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-03 00:00:00.000000000 Z
11
+ date: 2015-04-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -122,6 +122,7 @@ files:
122
122
  - lib/pocketsphinx/version.rb
123
123
  - pocketsphinx-ruby.gemspec
124
124
  - spec/assets/audio/goforward.raw
125
+ - spec/assets/audio/hello.wav
125
126
  - spec/assets/grammars/goforward.gram
126
127
  - spec/assets/grammars/invalid.gram
127
128
  - spec/assets/grammars/sentences.gram
@@ -162,6 +163,7 @@ specification_version: 4
162
163
  summary: Ruby speech recognition with Pocketsphinx
163
164
  test_files:
164
165
  - spec/assets/audio/goforward.raw
166
+ - spec/assets/audio/hello.wav
165
167
  - spec/assets/grammars/goforward.gram
166
168
  - spec/assets/grammars/invalid.gram
167
169
  - spec/assets/grammars/sentences.gram