pocketsphinx-ruby 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +12 -2
- data/lib/pocketsphinx/api/pocketsphinx.rb +3 -3
- data/lib/pocketsphinx/decoder.rb +7 -11
- data/lib/pocketsphinx/version.rb +1 -1
- data/spec/configuration_spec.rb +2 -2
- data/spec/decoder_spec.rb +5 -8
- data/spec/integration/decoder_spec.rb +2 -4
- data/spec/integration/default_recognition_spec.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b38998dd59c577300db4b63bfda52e037c49409
|
4
|
+
data.tar.gz: 82dd10ee0f739d6459013513ae986d6e60124dfd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d11e77f70106d9beaab587e7c8372dc217a9e511e7058e6d4f0a55fb4ce040b5ad50ef203a9755324ebaccc6fe919919eb04e5bb4770a290413b9990cecbfec6
|
7
|
+
data.tar.gz: fbea33173c9c8bdca4458c2dcb1f88e5643be169d8a54f4ed3baf593677f790d754e0f58a1c37ca527cbfeeeb9c0b19b28461afe417da509c2073543c33522ee
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -139,14 +139,14 @@ puts decoder.hypothesis # => "go forward ten years"
|
|
139
139
|
|
140
140
|
### Keyword Spotting
|
141
141
|
|
142
|
-
Keyword spotting is another feature that is not in the current stable (0.8) releases of Pocketsphinx, having been [merged into trunk](https://github.com/cmusphinx/pocketsphinx/commit/f562f9356cc7f1ade4941ebdde0c377642a023e3) early in 2014.
|
142
|
+
Keyword spotting is another feature that is not in the current stable (0.8) releases of Pocketsphinx, having been [merged into trunk](https://github.com/cmusphinx/pocketsphinx/commit/f562f9356cc7f1ade4941ebdde0c377642a023e3) early in 2014. It can be useful for detecting an activation keyword in a command and control application, while ignoring all other speech. Set up a recognizer as follows:
|
143
143
|
|
144
144
|
```ruby
|
145
145
|
configuration = Pocketsphinx::Configuration::KeywordSpotting.new('Okay computer')
|
146
146
|
recognizer = Pocketsphinx::LiveSpeechRecognizer.new(configuration)
|
147
147
|
```
|
148
148
|
|
149
|
-
The `KeywordSpotting` configuration accepts a second argument for adjusting the sensitivity of the keyword detection. Note that this is just a wrapper which sets the `keyphrase` and `kws_threshold` settings on the default configuration:
|
149
|
+
The `KeywordSpotting` configuration accepts a second argument for adjusting the sensitivity of the keyword detection. Note that this is just a wrapper which sets the `keyphrase` and `kws_threshold` settings on the default configuration, and removes the language model:
|
150
150
|
|
151
151
|
```ruby
|
152
152
|
Pocketsphinx::Configuration::KeywordSpotting.new('keyword', 2).changes
|
@@ -177,9 +177,19 @@ configuration = Pocketsphinx::Configuration::Grammar.new do
|
|
177
177
|
end
|
178
178
|
```
|
179
179
|
|
180
|
+
## Recognition Accuracy and Training
|
181
|
+
|
182
|
+
See the CMU Sphinx resources on [training](http://cmusphinx.sourceforge.net/wiki/tutorialam) and [adapting](http://cmusphinx.sourceforge.net/wiki/tutorialadapt) acoustic models for more information.
|
183
|
+
|
184
|
+
[Peter Grasch](http://grasch.net/), author of [Simon](https://simon.kde.org/), has also made a number of interesting posts on the [state of open source speech recognition](http://grasch.net/node/19), as wells as improving [language](http://grasch.net/node/20) and [acoustic](http://grasch.net/node/21) models.
|
185
|
+
|
186
|
+
See [`sphinxtrain-ruby`](https://github.com/watsonbox/sphinxtrain-ruby) for an experimental toolkit for training/adapting CMU Sphinx acoustic models. Its main goal is to help with adapting existing acoustic models to a specific speaker/accent.
|
187
|
+
|
180
188
|
|
181
189
|
## Troubleshooting
|
182
190
|
|
191
|
+
First and foremost, because this gem **depends on development versions** of CMU Sphinx packages, there will be times when errors are caused by API changes or bugs in those packages. Unfortunately until some up to date releases are made this is going to happen from time to time, so please do open an issue with as much detail as you have.
|
192
|
+
|
183
193
|
This gem has been tested with a manual Pocketsphinx installation on Ubuntu 14.04 and a Homebrew Pocketsphinx installation on OSX 10.9.4 Mavericks. Take a look at the following common problems before opening an issue.
|
184
194
|
|
185
195
|
**`attach_function': Function 'ps_default_search_args' not found in [libpocketsphinx.so] (FFI::NotFoundError)**
|
@@ -11,12 +11,12 @@ module Pocketsphinx
|
|
11
11
|
attach_function :ps_reinit, [:decoder, :configuration], :int
|
12
12
|
attach_function :ps_default_search_args, [:pointer], :void
|
13
13
|
attach_function :ps_args, [], :pointer
|
14
|
-
attach_function :ps_decode_raw, [:decoder, :pointer, :
|
14
|
+
attach_function :ps_decode_raw, [:decoder, :pointer, :long], :int
|
15
15
|
attach_function :ps_process_raw, [:decoder, :pointer, :size_t, :int, :int], :int
|
16
|
-
attach_function :ps_start_utt, [:decoder
|
16
|
+
attach_function :ps_start_utt, [:decoder], :int
|
17
17
|
attach_function :ps_end_utt, [:decoder], :int
|
18
18
|
attach_function :ps_get_in_speech, [:decoder], :uint8
|
19
|
-
attach_function :ps_get_hyp, [:decoder, :pointer
|
19
|
+
attach_function :ps_get_hyp, [:decoder, :pointer], :string
|
20
20
|
attach_function :ps_set_jsgf_string, [:decoder, :string, :string], :int
|
21
21
|
attach_function :ps_unset_search, [:decoder, :string], :int
|
22
22
|
attach_function :ps_get_search, [:decoder], :string
|
data/lib/pocketsphinx/decoder.rb
CHANGED
@@ -5,10 +5,10 @@ module Pocketsphinx
|
|
5
5
|
include API::CallHelpers
|
6
6
|
|
7
7
|
class Hypothesis < SimpleDelegator
|
8
|
-
attr_accessor :path_score
|
8
|
+
attr_accessor :path_score
|
9
9
|
|
10
|
-
def initialize(string, path_score
|
11
|
-
@path_score
|
10
|
+
def initialize(string, path_score)
|
11
|
+
@path_score = path_score
|
12
12
|
super(string)
|
13
13
|
end
|
14
14
|
end
|
@@ -80,10 +80,8 @@ module Pocketsphinx
|
|
80
80
|
# This function should be called before any utterance data is passed
|
81
81
|
# to the decoder. It marks the start of a new utterance and
|
82
82
|
# reinitializes internal data structures.
|
83
|
-
|
84
|
-
|
85
|
-
def start_utterance(name = nil)
|
86
|
-
api_call :ps_start_utt, ps_decoder, name
|
83
|
+
def start_utterance
|
84
|
+
api_call :ps_start_utt, ps_decoder
|
87
85
|
end
|
88
86
|
|
89
87
|
# End utterance processing
|
@@ -101,14 +99,12 @@ module Pocketsphinx
|
|
101
99
|
# @return [Hypothesis] Hypothesis (behaves like a string)
|
102
100
|
def hypothesis
|
103
101
|
mp_path_score = FFI::MemoryPointer.new(:int32, 1)
|
104
|
-
mp_utterance_id = FFI::MemoryPointer.new(:pointer, 1)
|
105
102
|
|
106
|
-
hypothesis = ps_api.ps_get_hyp(ps_decoder, mp_path_score
|
103
|
+
hypothesis = ps_api.ps_get_hyp(ps_decoder, mp_path_score)
|
107
104
|
|
108
105
|
hypothesis.nil? ? nil : Hypothesis.new(
|
109
106
|
hypothesis,
|
110
|
-
mp_path_score.get_int32(0)
|
111
|
-
mp_utterance_id.read_pointer.read_string.force_encoding('UTF-8')
|
107
|
+
mp_path_score.get_int32(0)
|
112
108
|
)
|
113
109
|
end
|
114
110
|
|
data/lib/pocketsphinx/version.rb
CHANGED
data/spec/configuration_spec.rb
CHANGED
@@ -65,7 +65,7 @@ describe Pocketsphinx::Configuration do
|
|
65
65
|
|
66
66
|
describe '#setting_names' do
|
67
67
|
it 'contains the names of all possible system settings' do
|
68
|
-
expect(subject.setting_names.count).to eq(
|
68
|
+
expect(subject.setting_names.count).to eq(112)
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
@@ -84,7 +84,7 @@ describe Pocketsphinx::Configuration do
|
|
84
84
|
it 'gives details for all settings when no name is specified' do
|
85
85
|
details = subject.details
|
86
86
|
|
87
|
-
expect(details.count).to eq(
|
87
|
+
expect(details.count).to eq(112)
|
88
88
|
expect(details.first).to eq({
|
89
89
|
name: "agc",
|
90
90
|
type: :string,
|
data/spec/decoder_spec.rb
CHANGED
@@ -80,19 +80,19 @@ describe Pocketsphinx::Decoder do
|
|
80
80
|
it 'calls libpocketsphinx' do
|
81
81
|
expect(ps_api)
|
82
82
|
.to receive(:ps_start_utt)
|
83
|
-
.with(subject.ps_decoder
|
83
|
+
.with(subject.ps_decoder)
|
84
84
|
.and_return(0)
|
85
85
|
|
86
|
-
subject.start_utterance
|
86
|
+
subject.start_utterance
|
87
87
|
end
|
88
88
|
|
89
89
|
it 'raises an exception on error' do
|
90
90
|
expect(ps_api)
|
91
91
|
.to receive(:ps_start_utt)
|
92
|
-
.with(subject.ps_decoder
|
92
|
+
.with(subject.ps_decoder)
|
93
93
|
.and_return(-1)
|
94
94
|
|
95
|
-
expect { subject.start_utterance
|
95
|
+
expect { subject.start_utterance }
|
96
96
|
.to raise_exception "Decoder#start_utterance failed with error code -1"
|
97
97
|
end
|
98
98
|
end
|
@@ -132,13 +132,11 @@ describe Pocketsphinx::Decoder do
|
|
132
132
|
describe '#hypothesis' do
|
133
133
|
it 'calls libpocketsphinx' do
|
134
134
|
expect(ps_api)
|
135
|
-
.to receive(:ps_get_hyp) do |ps_decoder, mp_path_score
|
135
|
+
.to receive(:ps_get_hyp) do |ps_decoder, mp_path_score|
|
136
136
|
expect(ps_decoder).to eq(subject.ps_decoder)
|
137
137
|
expect(mp_path_score).to be_a(FFI::MemoryPointer)
|
138
|
-
expect(mp_utterance_id).to be_a(FFI::MemoryPointer)
|
139
138
|
|
140
139
|
mp_path_score.put_int32(0, 20)
|
141
|
-
mp_utterance_id.write_pointer(FFI::MemoryPointer.from_string("Utterance"))
|
142
140
|
|
143
141
|
"Hypothesis"
|
144
142
|
end
|
@@ -147,7 +145,6 @@ describe Pocketsphinx::Decoder do
|
|
147
145
|
|
148
146
|
expect(hypothesis).to eq("Hypothesis")
|
149
147
|
expect(hypothesis.path_score).to eq(20)
|
150
|
-
expect(hypothesis.utterance_id).to eq("Utterance")
|
151
148
|
end
|
152
149
|
end
|
153
150
|
|
@@ -15,14 +15,12 @@ describe Pocketsphinx::Decoder do
|
|
15
15
|
@decoder.ps_api = nil
|
16
16
|
subject.decode File.open('spec/assets/audio/goforward.raw', 'rb')
|
17
17
|
|
18
|
-
|
19
|
-
# get this quite right, but nonetheless this is the expected output
|
20
|
-
expect(subject.hypothesis).to eq("go forward ten years")
|
18
|
+
expect(subject.hypothesis).to eq("go forward ten meters")
|
21
19
|
end
|
22
20
|
|
23
21
|
it 'accepts a file path as well as a stream' do
|
24
22
|
subject.decode 'spec/assets/audio/goforward.raw'
|
25
|
-
expect(subject.hypothesis).to eq("go forward ten
|
23
|
+
expect(subject.hypothesis).to eq("go forward ten meters")
|
26
24
|
end
|
27
25
|
end
|
28
26
|
end
|
@@ -15,7 +15,7 @@ describe 'speech recognition with default configuration' do
|
|
15
15
|
describe '#recognize' do
|
16
16
|
it 'should decode speech in raw audio' do
|
17
17
|
expect { |b| subject.recognize('spec/assets/audio/goforward.raw', 4096, &b) }.
|
18
|
-
to yield_with_args("go forward ten
|
18
|
+
to yield_with_args("go forward ten meters")
|
19
19
|
end
|
20
20
|
end
|
21
21
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pocketsphinx-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Howard Wilson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|