pihsi 0.0.8 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/pocket_sphinx/pocket_sphinx.c +27 -31
- data/lib/pihsi/version.rb +1 -1
- data/lib/pihsi.rb +1 -1
- metadata +2 -3
- data/lib/pihsi/speech_recognizer.rb +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d949dd5f7e6d5f382c6c74a44f3678fbfe0a8fa8
|
4
|
+
data.tar.gz: ed8b316d1c8ffb11f954a57a0389d787079acd70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b09454ee9df43e8aac32714dcaf0a9c11fa7a56f5ace89730cd817fdba4115a4cbca1199ce4ef04e30f3f82ae6727bb7ebf037be84a0d02a6fb3c8ec0a2c7b9
|
7
|
+
data.tar.gz: 31ccc59f39331163b8fbeb290e7ee2db0e2ded8e40f9b77a5046610cbbdb5f79d34d40b5cf3fdc660aca5842699896d6e43a2b7ba48db23cb9db293b6174a694
|
data/README.md
CHANGED
@@ -29,7 +29,7 @@ Or install it yourself as:
|
|
29
29
|
Initialize Pihsi::SpeechRecognizer with proper [hmm, lm and dict](http://cmusphinx.sourceforge.net/wiki/tutorialpocketsphinx#initialization):
|
30
30
|
|
31
31
|
```ruby
|
32
|
-
recognizer = Pihsi::SpeechRecognizer.new
|
32
|
+
recognizer = Pihsi::SpeechRecognizer.new hmm: 'xxx', lm: 'yyy', dict: 'zzz'
|
33
33
|
```
|
34
34
|
|
35
35
|
Recognize a string read from your audio file:
|
@@ -5,50 +5,40 @@ typedef struct ps {
|
|
5
5
|
ps_decoder_t *decoder;
|
6
6
|
} PocketSphinx;
|
7
7
|
|
8
|
-
|
8
|
+
static VALUE rb_eUtteranceError;
|
9
|
+
|
10
|
+
/* Converts raw audio data into text.
|
11
|
+
*
|
12
|
+
* @param data [String] the raw audio data
|
13
|
+
* @return [String, nil] the transcribed text or nil
|
14
|
+
*/
|
15
|
+
VALUE recognize(VALUE self, VALUE data) {
|
9
16
|
char const *hyp, *uttid;
|
10
17
|
int rv;
|
11
18
|
int32 score;
|
12
19
|
ps_decoder_t *ps;
|
13
20
|
PocketSphinx *pocketSphinx;
|
14
|
-
VALUE result = rb_hash_new();
|
15
21
|
|
16
22
|
Data_Get_Struct(self, PocketSphinx, pocketSphinx);
|
17
23
|
ps = pocketSphinx -> decoder;
|
18
24
|
|
19
|
-
int data_length = RARRAY_LEN(data);
|
20
|
-
int16 *c_data = malloc(sizeof(int16) * data_length);
|
21
|
-
|
22
|
-
int i;
|
23
|
-
for(i = 0; i < data_length; i++) {
|
24
|
-
c_data[i] = NUM2INT(rb_ary_entry(data, i)); // Rubinius does not support NUM2SHORT();
|
25
|
-
}
|
26
|
-
|
27
25
|
rv = ps_start_utt(ps, "goforward");
|
28
26
|
|
29
|
-
if (rv < 0)
|
30
|
-
|
31
|
-
rb_raise(rb_eStandardError, "cannot start utterance");
|
32
|
-
}
|
27
|
+
if (rv < 0)
|
28
|
+
rb_raise(rb_eUtteranceError, "cannot start utterance");
|
33
29
|
|
34
|
-
rv = ps_process_raw(ps,
|
30
|
+
rv = ps_process_raw(ps, (int16 *) RSTRING_PTR(data), RSTRING_LEN(data) / 2, FALSE, FALSE);
|
35
31
|
rv = ps_end_utt(ps);
|
36
32
|
|
37
|
-
if (rv < 0)
|
38
|
-
|
39
|
-
rb_raise(rb_eStandardError, "cannot end utterance");
|
40
|
-
}
|
33
|
+
if (rv < 0)
|
34
|
+
rb_raise(rb_eUtteranceError, "cannot end utterance");
|
41
35
|
|
42
36
|
hyp = ps_get_hyp(ps, &score, &uttid);
|
43
37
|
|
44
|
-
free(c_data);
|
45
|
-
|
46
38
|
if (hyp == NULL) {
|
47
39
|
return Qnil;
|
48
40
|
} else {
|
49
|
-
|
50
|
-
rb_hash_aset(result, rb_str_new2("hypothesis"), rb_str_new2(hyp));
|
51
|
-
return result;
|
41
|
+
return rb_str_new2(hyp);
|
52
42
|
}
|
53
43
|
}
|
54
44
|
|
@@ -63,16 +53,22 @@ static VALUE allocate(VALUE self) {
|
|
63
53
|
return Data_Make_Struct(self, PocketSphinx, 0, deallocate, ps);
|
64
54
|
}
|
65
55
|
|
66
|
-
|
56
|
+
/*
|
57
|
+
* @param options [Hash] PocketSphinx configuration (e.g. hmm)
|
58
|
+
*/
|
59
|
+
VALUE initialize(VALUE self, VALUE options_hash) {
|
67
60
|
int i;
|
68
61
|
PocketSphinx *ps;
|
69
62
|
VALUE option, key, value;
|
70
63
|
char *c_key;
|
71
64
|
cmd_ln_t *config = cmd_ln_init(NULL, ps_args(), TRUE, "-logfn", "/dev/null", NULL);
|
72
65
|
|
66
|
+
VALUE options = rb_funcall(options_hash, rb_intern("to_a"), 0);
|
67
|
+
|
73
68
|
for (i = 0; i < RARRAY_LEN(options); i++) {
|
74
69
|
option = rb_ary_entry(options, i);
|
75
|
-
key = rb_ary_entry(option, 0);
|
70
|
+
key = rb_funcall(rb_ary_entry(option, 0), rb_intern("to_s"), 0);
|
71
|
+
key = rb_funcall(key, rb_intern("prepend"), 1, rb_str_new2("-"));
|
76
72
|
c_key = RSTRING_PTR(key);
|
77
73
|
value = rb_ary_entry(option, 1);
|
78
74
|
|
@@ -95,9 +91,9 @@ VALUE initialize(VALUE self, VALUE options) {
|
|
95
91
|
|
96
92
|
void Init_pocket_sphinx() {
|
97
93
|
VALUE rb_mPihsi = rb_define_module("Pihsi");
|
98
|
-
VALUE
|
99
|
-
|
100
|
-
|
101
|
-
rb_define_method(
|
102
|
-
|
94
|
+
VALUE rb_cSpeechRecognizer = rb_define_class_under(rb_mPihsi, "SpeechRecognizer", rb_cObject);
|
95
|
+
rb_define_alloc_func(rb_cSpeechRecognizer, allocate);
|
96
|
+
rb_define_method(rb_cSpeechRecognizer, "initialize", initialize, 1);
|
97
|
+
rb_define_method(rb_cSpeechRecognizer, "recognize", recognize, 1);
|
98
|
+
rb_eUtteranceError = rb_define_class_under(rb_mPihsi, "UtteranceError", rb_eStandardError);
|
103
99
|
}
|
data/lib/pihsi/version.rb
CHANGED
data/lib/pihsi.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pihsi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zhi-Qiang Lei
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -112,7 +112,6 @@ files:
|
|
112
112
|
- ext/pocket_sphinx/extconf.rb
|
113
113
|
- ext/pocket_sphinx/pocket_sphinx.c
|
114
114
|
- lib/pihsi.rb
|
115
|
-
- lib/pihsi/speech_recognizer.rb
|
116
115
|
- lib/pihsi/version.rb
|
117
116
|
- pihsi.gemspec
|
118
117
|
- spec/factories/speech_recognizer.rb
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require "pocket_sphinx/pocket_sphinx"
|
2
|
-
|
3
|
-
module Pihsi
|
4
|
-
class SpeechRecognizer
|
5
|
-
attr_reader :decoder
|
6
|
-
|
7
|
-
def initialize(options = {})
|
8
|
-
_options = options.inject([]) do |result, (key, value)|
|
9
|
-
result << ["-#{key}", value] unless value.nil?
|
10
|
-
result
|
11
|
-
end
|
12
|
-
@decoder = PocketSphinx::Decoder.new(_options)
|
13
|
-
end
|
14
|
-
|
15
|
-
# Converts raw audio data into text
|
16
|
-
# @param data [String] the raw audio data
|
17
|
-
# @return [String, nil] the transcribed text or nil
|
18
|
-
def recognize(data)
|
19
|
-
if result = decoder.decode(data.unpack('s*'))
|
20
|
-
result["hypothesis"]
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|