pihsi 0.0.8 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/pocket_sphinx/pocket_sphinx.c +27 -31
- data/lib/pihsi/version.rb +1 -1
- data/lib/pihsi.rb +1 -1
- metadata +2 -3
- data/lib/pihsi/speech_recognizer.rb +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d949dd5f7e6d5f382c6c74a44f3678fbfe0a8fa8
|
4
|
+
data.tar.gz: ed8b316d1c8ffb11f954a57a0389d787079acd70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b09454ee9df43e8aac32714dcaf0a9c11fa7a56f5ace89730cd817fdba4115a4cbca1199ce4ef04e30f3f82ae6727bb7ebf037be84a0d02a6fb3c8ec0a2c7b9
|
7
|
+
data.tar.gz: 31ccc59f39331163b8fbeb290e7ee2db0e2ded8e40f9b77a5046610cbbdb5f79d34d40b5cf3fdc660aca5842699896d6e43a2b7ba48db23cb9db293b6174a694
|
data/README.md
CHANGED
@@ -29,7 +29,7 @@ Or install it yourself as:
|
|
29
29
|
Initialize Pihsi::SpeechRecognizer with proper [hmm, lm and dict](http://cmusphinx.sourceforge.net/wiki/tutorialpocketsphinx#initialization):
|
30
30
|
|
31
31
|
```ruby
|
32
|
-
recognizer = Pihsi::SpeechRecognizer.new
|
32
|
+
recognizer = Pihsi::SpeechRecognizer.new hmm: 'xxx', lm: 'yyy', dict: 'zzz'
|
33
33
|
```
|
34
34
|
|
35
35
|
Recognize a string read from your audio file:
|
@@ -5,50 +5,40 @@ typedef struct ps {
|
|
5
5
|
ps_decoder_t *decoder;
|
6
6
|
} PocketSphinx;
|
7
7
|
|
8
|
-
|
8
|
+
static VALUE rb_eUtteranceError;
|
9
|
+
|
10
|
+
/* Converts raw audio data into text.
|
11
|
+
*
|
12
|
+
* @param data [String] the raw audio data
|
13
|
+
* @return [String, nil] the transcribed text or nil
|
14
|
+
*/
|
15
|
+
VALUE recognize(VALUE self, VALUE data) {
|
9
16
|
char const *hyp, *uttid;
|
10
17
|
int rv;
|
11
18
|
int32 score;
|
12
19
|
ps_decoder_t *ps;
|
13
20
|
PocketSphinx *pocketSphinx;
|
14
|
-
VALUE result = rb_hash_new();
|
15
21
|
|
16
22
|
Data_Get_Struct(self, PocketSphinx, pocketSphinx);
|
17
23
|
ps = pocketSphinx -> decoder;
|
18
24
|
|
19
|
-
int data_length = RARRAY_LEN(data);
|
20
|
-
int16 *c_data = malloc(sizeof(int16) * data_length);
|
21
|
-
|
22
|
-
int i;
|
23
|
-
for(i = 0; i < data_length; i++) {
|
24
|
-
c_data[i] = NUM2INT(rb_ary_entry(data, i)); // Rubinius does not support NUM2SHORT();
|
25
|
-
}
|
26
|
-
|
27
25
|
rv = ps_start_utt(ps, "goforward");
|
28
26
|
|
29
|
-
if (rv < 0)
|
30
|
-
|
31
|
-
rb_raise(rb_eStandardError, "cannot start utterance");
|
32
|
-
}
|
27
|
+
if (rv < 0)
|
28
|
+
rb_raise(rb_eUtteranceError, "cannot start utterance");
|
33
29
|
|
34
|
-
rv = ps_process_raw(ps,
|
30
|
+
rv = ps_process_raw(ps, (int16 *) RSTRING_PTR(data), RSTRING_LEN(data) / 2, FALSE, FALSE);
|
35
31
|
rv = ps_end_utt(ps);
|
36
32
|
|
37
|
-
if (rv < 0)
|
38
|
-
|
39
|
-
rb_raise(rb_eStandardError, "cannot end utterance");
|
40
|
-
}
|
33
|
+
if (rv < 0)
|
34
|
+
rb_raise(rb_eUtteranceError, "cannot end utterance");
|
41
35
|
|
42
36
|
hyp = ps_get_hyp(ps, &score, &uttid);
|
43
37
|
|
44
|
-
free(c_data);
|
45
|
-
|
46
38
|
if (hyp == NULL) {
|
47
39
|
return Qnil;
|
48
40
|
} else {
|
49
|
-
|
50
|
-
rb_hash_aset(result, rb_str_new2("hypothesis"), rb_str_new2(hyp));
|
51
|
-
return result;
|
41
|
+
return rb_str_new2(hyp);
|
52
42
|
}
|
53
43
|
}
|
54
44
|
|
@@ -63,16 +53,22 @@ static VALUE allocate(VALUE self) {
|
|
63
53
|
return Data_Make_Struct(self, PocketSphinx, 0, deallocate, ps);
|
64
54
|
}
|
65
55
|
|
66
|
-
|
56
|
+
/*
|
57
|
+
* @param options [Hash] PocketSphinx configuration (e.g. hmm)
|
58
|
+
*/
|
59
|
+
VALUE initialize(VALUE self, VALUE options_hash) {
|
67
60
|
int i;
|
68
61
|
PocketSphinx *ps;
|
69
62
|
VALUE option, key, value;
|
70
63
|
char *c_key;
|
71
64
|
cmd_ln_t *config = cmd_ln_init(NULL, ps_args(), TRUE, "-logfn", "/dev/null", NULL);
|
72
65
|
|
66
|
+
VALUE options = rb_funcall(options_hash, rb_intern("to_a"), 0);
|
67
|
+
|
73
68
|
for (i = 0; i < RARRAY_LEN(options); i++) {
|
74
69
|
option = rb_ary_entry(options, i);
|
75
|
-
key = rb_ary_entry(option, 0);
|
70
|
+
key = rb_funcall(rb_ary_entry(option, 0), rb_intern("to_s"), 0);
|
71
|
+
key = rb_funcall(key, rb_intern("prepend"), 1, rb_str_new2("-"));
|
76
72
|
c_key = RSTRING_PTR(key);
|
77
73
|
value = rb_ary_entry(option, 1);
|
78
74
|
|
@@ -95,9 +91,9 @@ VALUE initialize(VALUE self, VALUE options) {
|
|
95
91
|
|
96
92
|
void Init_pocket_sphinx() {
|
97
93
|
VALUE rb_mPihsi = rb_define_module("Pihsi");
|
98
|
-
VALUE
|
99
|
-
|
100
|
-
|
101
|
-
rb_define_method(
|
102
|
-
|
94
|
+
VALUE rb_cSpeechRecognizer = rb_define_class_under(rb_mPihsi, "SpeechRecognizer", rb_cObject);
|
95
|
+
rb_define_alloc_func(rb_cSpeechRecognizer, allocate);
|
96
|
+
rb_define_method(rb_cSpeechRecognizer, "initialize", initialize, 1);
|
97
|
+
rb_define_method(rb_cSpeechRecognizer, "recognize", recognize, 1);
|
98
|
+
rb_eUtteranceError = rb_define_class_under(rb_mPihsi, "UtteranceError", rb_eStandardError);
|
103
99
|
}
|
data/lib/pihsi/version.rb
CHANGED
data/lib/pihsi.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pihsi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zhi-Qiang Lei
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -112,7 +112,6 @@ files:
|
|
112
112
|
- ext/pocket_sphinx/extconf.rb
|
113
113
|
- ext/pocket_sphinx/pocket_sphinx.c
|
114
114
|
- lib/pihsi.rb
|
115
|
-
- lib/pihsi/speech_recognizer.rb
|
116
115
|
- lib/pihsi/version.rb
|
117
116
|
- pihsi.gemspec
|
118
117
|
- spec/factories/speech_recognizer.rb
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require "pocket_sphinx/pocket_sphinx"
|
2
|
-
|
3
|
-
module Pihsi
|
4
|
-
class SpeechRecognizer
|
5
|
-
attr_reader :decoder
|
6
|
-
|
7
|
-
def initialize(options = {})
|
8
|
-
_options = options.inject([]) do |result, (key, value)|
|
9
|
-
result << ["-#{key}", value] unless value.nil?
|
10
|
-
result
|
11
|
-
end
|
12
|
-
@decoder = PocketSphinx::Decoder.new(_options)
|
13
|
-
end
|
14
|
-
|
15
|
-
# Converts raw audio data into text
|
16
|
-
# @param data [String] the raw audio data
|
17
|
-
# @return [String, nil] the transcribed text or nil
|
18
|
-
def recognize(data)
|
19
|
-
if result = decoder.decode(data.unpack('s*'))
|
20
|
-
result["hypothesis"]
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|