pihsi 0.0.8 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/README.md +1 -1
 - data/ext/pocket_sphinx/pocket_sphinx.c +27 -31
 - data/lib/pihsi/version.rb +1 -1
 - data/lib/pihsi.rb +1 -1
 - metadata +2 -3
 - data/lib/pihsi/speech_recognizer.rb +0 -24
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: d949dd5f7e6d5f382c6c74a44f3678fbfe0a8fa8
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: ed8b316d1c8ffb11f954a57a0389d787079acd70
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 5b09454ee9df43e8aac32714dcaf0a9c11fa7a56f5ace89730cd817fdba4115a4cbca1199ce4ef04e30f3f82ae6727bb7ebf037be84a0d02a6fb3c8ec0a2c7b9
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 31ccc59f39331163b8fbeb290e7ee2db0e2ded8e40f9b77a5046610cbbdb5f79d34d40b5cf3fdc660aca5842699896d6e43a2b7ba48db23cb9db293b6174a694
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -29,7 +29,7 @@ Or install it yourself as: 
     | 
|
| 
       29 
29 
     | 
    
         
             
            Initialize Pihsi::SpeechRecognizer with proper [hmm, lm and dict](http://cmusphinx.sourceforge.net/wiki/tutorialpocketsphinx#initialization):
         
     | 
| 
       30 
30 
     | 
    
         | 
| 
       31 
31 
     | 
    
         
             
            ```ruby
         
     | 
| 
       32 
     | 
    
         
            -
            recognizer = Pihsi::SpeechRecognizer.new
         
     | 
| 
      
 32 
     | 
    
         
            +
            recognizer = Pihsi::SpeechRecognizer.new hmm: 'xxx', lm: 'yyy', dict: 'zzz'
         
     | 
| 
       33 
33 
     | 
    
         
             
            ```
         
     | 
| 
       34 
34 
     | 
    
         | 
| 
       35 
35 
     | 
    
         
             
            Recognize a string read from your audio file:
         
     | 
| 
         @@ -5,50 +5,40 @@ typedef struct ps { 
     | 
|
| 
       5 
5 
     | 
    
         
             
                ps_decoder_t *decoder;
         
     | 
| 
       6 
6 
     | 
    
         
             
            } PocketSphinx;
         
     | 
| 
       7 
7 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
      
 8 
     | 
    
         
            +
            static VALUE rb_eUtteranceError;
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            /* Converts raw audio data into text.
         
     | 
| 
      
 11 
     | 
    
         
            +
             *
         
     | 
| 
      
 12 
     | 
    
         
            +
             * @param data [String] the raw audio data
         
     | 
| 
      
 13 
     | 
    
         
            +
             * @return [String, nil] the transcribed text or nil
         
     | 
| 
      
 14 
     | 
    
         
            +
             */
         
     | 
| 
      
 15 
     | 
    
         
            +
            VALUE recognize(VALUE self, VALUE data) {
         
     | 
| 
       9 
16 
     | 
    
         
             
                char const *hyp, *uttid;
         
     | 
| 
       10 
17 
     | 
    
         
             
                int rv;
         
     | 
| 
       11 
18 
     | 
    
         
             
                int32 score;
         
     | 
| 
       12 
19 
     | 
    
         
             
                ps_decoder_t *ps;
         
     | 
| 
       13 
20 
     | 
    
         
             
                PocketSphinx *pocketSphinx;
         
     | 
| 
       14 
     | 
    
         
            -
                VALUE result = rb_hash_new();
         
     | 
| 
       15 
21 
     | 
    
         | 
| 
       16 
22 
     | 
    
         
             
                Data_Get_Struct(self, PocketSphinx, pocketSphinx);
         
     | 
| 
       17 
23 
     | 
    
         
             
                ps = pocketSphinx -> decoder;
         
     | 
| 
       18 
24 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
                int data_length = RARRAY_LEN(data);
         
     | 
| 
       20 
     | 
    
         
            -
                int16 *c_data = malloc(sizeof(int16) * data_length);
         
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
                int i;
         
     | 
| 
       23 
     | 
    
         
            -
                for(i = 0; i < data_length; i++) {
         
     | 
| 
       24 
     | 
    
         
            -
                    c_data[i] = NUM2INT(rb_ary_entry(data, i)); // Rubinius does not support NUM2SHORT();
         
     | 
| 
       25 
     | 
    
         
            -
                }
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
25 
     | 
    
         
             
                rv = ps_start_utt(ps, "goforward");
         
     | 
| 
       28 
26 
     | 
    
         | 
| 
       29 
     | 
    
         
            -
                if (rv < 0) 
     | 
| 
       30 
     | 
    
         
            -
                     
     | 
| 
       31 
     | 
    
         
            -
                    rb_raise(rb_eStandardError, "cannot start utterance");
         
     | 
| 
       32 
     | 
    
         
            -
                }
         
     | 
| 
      
 27 
     | 
    
         
            +
                if (rv < 0)
         
     | 
| 
      
 28 
     | 
    
         
            +
                    rb_raise(rb_eUtteranceError, "cannot start utterance");
         
     | 
| 
       33 
29 
     | 
    
         | 
| 
       34 
     | 
    
         
            -
                rv = ps_process_raw(ps,  
     | 
| 
      
 30 
     | 
    
         
            +
                rv = ps_process_raw(ps, (int16 *) RSTRING_PTR(data), RSTRING_LEN(data) / 2, FALSE, FALSE);
         
     | 
| 
       35 
31 
     | 
    
         
             
                rv = ps_end_utt(ps);
         
     | 
| 
       36 
32 
     | 
    
         | 
| 
       37 
     | 
    
         
            -
                if (rv < 0) 
     | 
| 
       38 
     | 
    
         
            -
                     
     | 
| 
       39 
     | 
    
         
            -
                    rb_raise(rb_eStandardError, "cannot end utterance");
         
     | 
| 
       40 
     | 
    
         
            -
                }
         
     | 
| 
      
 33 
     | 
    
         
            +
                if (rv < 0)
         
     | 
| 
      
 34 
     | 
    
         
            +
                    rb_raise(rb_eUtteranceError, "cannot end utterance");
         
     | 
| 
       41 
35 
     | 
    
         | 
| 
       42 
36 
     | 
    
         
             
                hyp = ps_get_hyp(ps, &score, &uttid);
         
     | 
| 
       43 
37 
     | 
    
         | 
| 
       44 
     | 
    
         
            -
                free(c_data);
         
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
38 
     | 
    
         
             
                if (hyp == NULL) {
         
     | 
| 
       47 
39 
     | 
    
         
             
                    return Qnil;
         
     | 
| 
       48 
40 
     | 
    
         
             
                } else {
         
     | 
| 
       49 
     | 
    
         
            -
                     
     | 
| 
       50 
     | 
    
         
            -
                    rb_hash_aset(result, rb_str_new2("hypothesis"), rb_str_new2(hyp));
         
     | 
| 
       51 
     | 
    
         
            -
                    return result;
         
     | 
| 
      
 41 
     | 
    
         
            +
                    return rb_str_new2(hyp);
         
     | 
| 
       52 
42 
     | 
    
         
             
                }
         
     | 
| 
       53 
43 
     | 
    
         
             
            }
         
     | 
| 
       54 
44 
     | 
    
         | 
| 
         @@ -63,16 +53,22 @@ static VALUE allocate(VALUE self) { 
     | 
|
| 
       63 
53 
     | 
    
         
             
                return Data_Make_Struct(self, PocketSphinx, 0, deallocate, ps);
         
     | 
| 
       64 
54 
     | 
    
         
             
            }
         
     | 
| 
       65 
55 
     | 
    
         | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
      
 56 
     | 
    
         
            +
            /*
         
     | 
| 
      
 57 
     | 
    
         
            +
             * @param options [Hash] PocketSphinx configuration (e.g. hmm)
         
     | 
| 
      
 58 
     | 
    
         
            +
             */
         
     | 
| 
      
 59 
     | 
    
         
            +
            VALUE initialize(VALUE self, VALUE options_hash) {
         
     | 
| 
       67 
60 
     | 
    
         
             
                int i;
         
     | 
| 
       68 
61 
     | 
    
         
             
                PocketSphinx *ps;
         
     | 
| 
       69 
62 
     | 
    
         
             
                VALUE option, key, value;
         
     | 
| 
       70 
63 
     | 
    
         
             
                char *c_key;
         
     | 
| 
       71 
64 
     | 
    
         
             
                cmd_ln_t *config = cmd_ln_init(NULL, ps_args(), TRUE, "-logfn", "/dev/null", NULL);
         
     | 
| 
       72 
65 
     | 
    
         | 
| 
      
 66 
     | 
    
         
            +
                VALUE options = rb_funcall(options_hash, rb_intern("to_a"), 0);
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
       73 
68 
     | 
    
         
             
                for (i = 0; i < RARRAY_LEN(options); i++) {
         
     | 
| 
       74 
69 
     | 
    
         
             
                    option = rb_ary_entry(options, i);
         
     | 
| 
       75 
     | 
    
         
            -
                    key = rb_ary_entry(option, 0);
         
     | 
| 
      
 70 
     | 
    
         
            +
                    key = rb_funcall(rb_ary_entry(option, 0), rb_intern("to_s"), 0);
         
     | 
| 
      
 71 
     | 
    
         
            +
                    key = rb_funcall(key, rb_intern("prepend"), 1, rb_str_new2("-"));
         
     | 
| 
       76 
72 
     | 
    
         
             
                    c_key = RSTRING_PTR(key);
         
     | 
| 
       77 
73 
     | 
    
         
             
                    value = rb_ary_entry(option, 1);
         
     | 
| 
       78 
74 
     | 
    
         | 
| 
         @@ -95,9 +91,9 @@ VALUE initialize(VALUE self, VALUE options) { 
     | 
|
| 
       95 
91 
     | 
    
         | 
| 
       96 
92 
     | 
    
         
             
            void Init_pocket_sphinx() {
         
     | 
| 
       97 
93 
     | 
    
         
             
                VALUE rb_mPihsi = rb_define_module("Pihsi");
         
     | 
| 
       98 
     | 
    
         
            -
                VALUE  
     | 
| 
       99 
     | 
    
         
            -
                 
     | 
| 
       100 
     | 
    
         
            -
                 
     | 
| 
       101 
     | 
    
         
            -
                rb_define_method( 
     | 
| 
       102 
     | 
    
         
            -
                 
     | 
| 
      
 94 
     | 
    
         
            +
                VALUE rb_cSpeechRecognizer = rb_define_class_under(rb_mPihsi, "SpeechRecognizer", rb_cObject);
         
     | 
| 
      
 95 
     | 
    
         
            +
                rb_define_alloc_func(rb_cSpeechRecognizer, allocate);
         
     | 
| 
      
 96 
     | 
    
         
            +
                rb_define_method(rb_cSpeechRecognizer, "initialize", initialize, 1);
         
     | 
| 
      
 97 
     | 
    
         
            +
                rb_define_method(rb_cSpeechRecognizer, "recognize", recognize, 1);
         
     | 
| 
      
 98 
     | 
    
         
            +
                rb_eUtteranceError = rb_define_class_under(rb_mPihsi, "UtteranceError", rb_eStandardError);
         
     | 
| 
       103 
99 
     | 
    
         
             
            }
         
     | 
    
        data/lib/pihsi/version.rb
    CHANGED
    
    
    
        data/lib/pihsi.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: pihsi
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.0 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Zhi-Qiang Lei
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2014-09- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2014-09-07 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: bundler
         
     | 
| 
         @@ -112,7 +112,6 @@ files: 
     | 
|
| 
       112 
112 
     | 
    
         
             
            - ext/pocket_sphinx/extconf.rb
         
     | 
| 
       113 
113 
     | 
    
         
             
            - ext/pocket_sphinx/pocket_sphinx.c
         
     | 
| 
       114 
114 
     | 
    
         
             
            - lib/pihsi.rb
         
     | 
| 
       115 
     | 
    
         
            -
            - lib/pihsi/speech_recognizer.rb
         
     | 
| 
       116 
115 
     | 
    
         
             
            - lib/pihsi/version.rb
         
     | 
| 
       117 
116 
     | 
    
         
             
            - pihsi.gemspec
         
     | 
| 
       118 
117 
     | 
    
         
             
            - spec/factories/speech_recognizer.rb
         
     | 
| 
         @@ -1,24 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require "pocket_sphinx/pocket_sphinx"
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
            module Pihsi
         
     | 
| 
       4 
     | 
    
         
            -
              class SpeechRecognizer
         
     | 
| 
       5 
     | 
    
         
            -
                attr_reader :decoder
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
                def initialize(options = {})
         
     | 
| 
       8 
     | 
    
         
            -
                  _options = options.inject([]) do |result, (key, value)|
         
     | 
| 
       9 
     | 
    
         
            -
                    result << ["-#{key}", value] unless value.nil?
         
     | 
| 
       10 
     | 
    
         
            -
                    result
         
     | 
| 
       11 
     | 
    
         
            -
                  end
         
     | 
| 
       12 
     | 
    
         
            -
                  @decoder = PocketSphinx::Decoder.new(_options)
         
     | 
| 
       13 
     | 
    
         
            -
                end
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
                # Converts raw audio data into text
         
     | 
| 
       16 
     | 
    
         
            -
                # @param data [String] the raw audio data
         
     | 
| 
       17 
     | 
    
         
            -
                # @return [String, nil] the transcribed text or nil
         
     | 
| 
       18 
     | 
    
         
            -
                def recognize(data)
         
     | 
| 
       19 
     | 
    
         
            -
                  if result = decoder.decode(data.unpack('s*'))
         
     | 
| 
       20 
     | 
    
         
            -
                    result["hypothesis"]
         
     | 
| 
       21 
     | 
    
         
            -
                  end
         
     | 
| 
       22 
     | 
    
         
            -
                end
         
     | 
| 
       23 
     | 
    
         
            -
              end
         
     | 
| 
       24 
     | 
    
         
            -
            end
         
     |