pihsi 0.0.8 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 650af7f9bd97d82f5d9163c4572835755f7175bb
4
- data.tar.gz: c1e853d14cd7ce5110b332357041a17cc839c0fc
3
+ metadata.gz: d949dd5f7e6d5f382c6c74a44f3678fbfe0a8fa8
4
+ data.tar.gz: ed8b316d1c8ffb11f954a57a0389d787079acd70
5
5
  SHA512:
6
- metadata.gz: f0ce6a10c92c009980f9500a3cb6bdca5984e3671b433ae2862d59532acf94c575b05e4c1241bb2268291fe4a99a77db52d3d56e09a99e78cdda601ca2e8c2d8
7
- data.tar.gz: 00df6ac4921be463b5bef64df63c18d563d4f6bc87d6162704b689aae22161fb3ede23895366e9067604b3e1e7ded91e39da23cfcb1c7ae449f4a7e95c771c0a
6
+ metadata.gz: 5b09454ee9df43e8aac32714dcaf0a9c11fa7a56f5ace89730cd817fdba4115a4cbca1199ce4ef04e30f3f82ae6727bb7ebf037be84a0d02a6fb3c8ec0a2c7b9
7
+ data.tar.gz: 31ccc59f39331163b8fbeb290e7ee2db0e2ded8e40f9b77a5046610cbbdb5f79d34d40b5cf3fdc660aca5842699896d6e43a2b7ba48db23cb9db293b6174a694
data/README.md CHANGED
@@ -29,7 +29,7 @@ Or install it yourself as:
29
29
  Initialize Pihsi::SpeechRecognizer with proper [hmm, lm and dict](http://cmusphinx.sourceforge.net/wiki/tutorialpocketsphinx#initialization):
30
30
 
31
31
  ```ruby
32
- recognizer = Pihsi::SpeechRecognizer.new
32
+ recognizer = Pihsi::SpeechRecognizer.new hmm: 'xxx', lm: 'yyy', dict: 'zzz'
33
33
  ```
34
34
 
35
35
  Recognize a string read from your audio file:
@@ -5,50 +5,40 @@ typedef struct ps {
5
5
  ps_decoder_t *decoder;
6
6
  } PocketSphinx;
7
7
 
8
- VALUE decode(VALUE self, VALUE data) {
8
+ static VALUE rb_eUtteranceError;
9
+
10
+ /* Converts raw audio data into text.
11
+ *
12
+ * @param data [String] the raw audio data
13
+ * @return [String, nil] the transcribed text or nil
14
+ */
15
+ VALUE recognize(VALUE self, VALUE data) {
9
16
  char const *hyp, *uttid;
10
17
  int rv;
11
18
  int32 score;
12
19
  ps_decoder_t *ps;
13
20
  PocketSphinx *pocketSphinx;
14
- VALUE result = rb_hash_new();
15
21
 
16
22
  Data_Get_Struct(self, PocketSphinx, pocketSphinx);
17
23
  ps = pocketSphinx -> decoder;
18
24
 
19
- int data_length = RARRAY_LEN(data);
20
- int16 *c_data = malloc(sizeof(int16) * data_length);
21
-
22
- int i;
23
- for(i = 0; i < data_length; i++) {
24
- c_data[i] = NUM2INT(rb_ary_entry(data, i)); // Rubinius does not support NUM2SHORT();
25
- }
26
-
27
25
  rv = ps_start_utt(ps, "goforward");
28
26
 
29
- if (rv < 0) {
30
- free(c_data);
31
- rb_raise(rb_eStandardError, "cannot start utterance");
32
- }
27
+ if (rv < 0)
28
+ rb_raise(rb_eUtteranceError, "cannot start utterance");
33
29
 
34
- rv = ps_process_raw(ps, c_data, data_length, FALSE, FALSE);
30
+ rv = ps_process_raw(ps, (int16 *) RSTRING_PTR(data), RSTRING_LEN(data) / 2, FALSE, FALSE);
35
31
  rv = ps_end_utt(ps);
36
32
 
37
- if (rv < 0) {
38
- free(c_data);
39
- rb_raise(rb_eStandardError, "cannot end utterance");
40
- }
33
+ if (rv < 0)
34
+ rb_raise(rb_eUtteranceError, "cannot end utterance");
41
35
 
42
36
  hyp = ps_get_hyp(ps, &score, &uttid);
43
37
 
44
- free(c_data);
45
-
46
38
  if (hyp == NULL) {
47
39
  return Qnil;
48
40
  } else {
49
- rb_hash_aset(result, rb_str_new2("score"), INT2NUM(hyp));
50
- rb_hash_aset(result, rb_str_new2("hypothesis"), rb_str_new2(hyp));
51
- return result;
41
+ return rb_str_new2(hyp);
52
42
  }
53
43
  }
54
44
 
@@ -63,16 +53,22 @@ static VALUE allocate(VALUE self) {
63
53
  return Data_Make_Struct(self, PocketSphinx, 0, deallocate, ps);
64
54
  }
65
55
 
66
- VALUE initialize(VALUE self, VALUE options) {
56
+ /*
57
+ * @param options [Hash] PocketSphinx configuration (e.g. hmm)
58
+ */
59
+ VALUE initialize(VALUE self, VALUE options_hash) {
67
60
  int i;
68
61
  PocketSphinx *ps;
69
62
  VALUE option, key, value;
70
63
  char *c_key;
71
64
  cmd_ln_t *config = cmd_ln_init(NULL, ps_args(), TRUE, "-logfn", "/dev/null", NULL);
72
65
 
66
+ VALUE options = rb_funcall(options_hash, rb_intern("to_a"), 0);
67
+
73
68
  for (i = 0; i < RARRAY_LEN(options); i++) {
74
69
  option = rb_ary_entry(options, i);
75
- key = rb_ary_entry(option, 0);
70
+ key = rb_funcall(rb_ary_entry(option, 0), rb_intern("to_s"), 0);
71
+ key = rb_funcall(key, rb_intern("prepend"), 1, rb_str_new2("-"));
76
72
  c_key = RSTRING_PTR(key);
77
73
  value = rb_ary_entry(option, 1);
78
74
 
@@ -95,9 +91,9 @@ VALUE initialize(VALUE self, VALUE options) {
95
91
 
96
92
  void Init_pocket_sphinx() {
97
93
  VALUE rb_mPihsi = rb_define_module("Pihsi");
98
- VALUE rb_mPocketSphinx = rb_define_module_under(rb_mPihsi, "PocketSphinx");
99
- VALUE rb_cDecoder = rb_define_class_under(rb_mPocketSphinx, "Decoder", rb_cObject);
100
- rb_define_alloc_func(rb_cDecoder, allocate);
101
- rb_define_method(rb_cDecoder, "initialize", initialize, 1);
102
- rb_define_method(rb_cDecoder, "decode", decode, 1);
94
+ VALUE rb_cSpeechRecognizer = rb_define_class_under(rb_mPihsi, "SpeechRecognizer", rb_cObject);
95
+ rb_define_alloc_func(rb_cSpeechRecognizer, allocate);
96
+ rb_define_method(rb_cSpeechRecognizer, "initialize", initialize, 1);
97
+ rb_define_method(rb_cSpeechRecognizer, "recognize", recognize, 1);
98
+ rb_eUtteranceError = rb_define_class_under(rb_mPihsi, "UtteranceError", rb_eStandardError);
103
99
  }
data/lib/pihsi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Pihsi
2
- VERSION = "0.0.8"
2
+ VERSION = "0.1.0"
3
3
  end
data/lib/pihsi.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'bundler/setup'
3
3
  require "pihsi/version"
4
- require "pihsi/speech_recognizer"
4
+ require "pocket_sphinx/pocket_sphinx"
5
5
 
6
6
  module Pihsi
7
7
  # Your code goes here...
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pihsi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zhi-Qiang Lei
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-04 00:00:00.000000000 Z
11
+ date: 2014-09-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -112,7 +112,6 @@ files:
112
112
  - ext/pocket_sphinx/extconf.rb
113
113
  - ext/pocket_sphinx/pocket_sphinx.c
114
114
  - lib/pihsi.rb
115
- - lib/pihsi/speech_recognizer.rb
116
115
  - lib/pihsi/version.rb
117
116
  - pihsi.gemspec
118
117
  - spec/factories/speech_recognizer.rb
@@ -1,24 +0,0 @@
1
- require "pocket_sphinx/pocket_sphinx"
2
-
3
- module Pihsi
4
- class SpeechRecognizer
5
- attr_reader :decoder
6
-
7
- def initialize(options = {})
8
- _options = options.inject([]) do |result, (key, value)|
9
- result << ["-#{key}", value] unless value.nil?
10
- result
11
- end
12
- @decoder = PocketSphinx::Decoder.new(_options)
13
- end
14
-
15
- # Converts raw audio data into text
16
- # @param data [String] the raw audio data
17
- # @return [String, nil] the transcribed text or nil
18
- def recognize(data)
19
- if result = decoder.decode(data.unpack('s*'))
20
- result["hypothesis"]
21
- end
22
- end
23
- end
24
- end