RubyGems - flite - Versions diffs - 0.0.3.1-x86-mingw32 → 0.1.0-x86-mingw32 - Mend

flite 0.0.3.1-x86-mingw32 → 0.1.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/README.md +97 -72
data/bin/{saytime.rb → saytime} +19 -1
data/bin/speaking-web-server +121 -0
data/ext/flite/extconf.rb +40 -3
data/ext/flite/rbflite.c +604 -83
data/ext/flite/rbflite.h +10 -0
data/ext/flite/win32_binary_gem.c +432 -0
data/flite.gemspec +7 -3
data/lib/flite.dll +0 -0
data/lib/flite.rb +45 -2
data/lib/flite/version.rb +1 -1
data/lib/flite_200.so +0 -0
data/lib/flite_210.so +0 -0
data/lib/flite_cmu_grapheme_lang.dll +0 -0
data/lib/flite_cmu_grapheme_lex.dll +0 -0
data/lib/flite_cmu_indic_lang.dll +0 -0
data/lib/flite_cmu_indic_lex.dll +0 -0
data/lib/flite_cmu_time_awb.dll +0 -0
data/lib/flite_cmu_us_awb.dll +0 -0
data/lib/flite_cmu_us_kal.dll +0 -0
data/lib/flite_cmu_us_kal16.dll +0 -0
data/lib/flite_cmu_us_rms.dll +0 -0
data/lib/flite_cmu_us_slt.dll +0 -0
data/lib/flite_cmulex.dll +0 -0
data/lib/flite_usenglish.dll +0 -0
data/lib/libmp3lame-0.dll +0 -0
metadata +23 -5

data/ext/flite/rbflite.c CHANGED

@@ -38,6 +38,13 @@
 #include "rbflite.h"
 #include <flite/flite_version.h>
+#ifndef MIN
+#define MIN(a, b) ((a) < (b)) ? (a) : (b)
+#endif
+#ifndef MAX
+#define MAX(a, b) ((a) > (b)) ? (a) : (b)
+#endif
 #ifdef WORDS_BIGENDIAN
 #define TO_LE4(num)  SWAPINT(num)
 #defien TO_LE2(num)  SWAPSHORT(num)
@@ -46,6 +53,16 @@
 #define TO_LE2(num)  (num)
 #endif
+#ifdef HAVE_LAME_LAME_H
+#include <lame/lame.h>
+#define HAVE_MP3LAME 1
+#endif
+#ifdef HAVE_LAME_H
+#include <lame.h>
+#define HAVE_MP3LAME 1
+#endif
 #ifdef HAVE_CST_AUDIO_STREAMING_INFO_UTT
 /* flite 2.0.0 */
 typedef struct cst_audio_streaming_info_struct *asc_last_arg_t;
@@ -56,6 +73,14 @@ typedef void *asc_last_arg_t;
 #define ASC_LAST_ARG_TO_USERDATA(last_arg) (last_arg)
 #endif
+enum rbfile_error {
+    RBFLITE_ERROR_SUCCESS,
+    RBFLITE_ERROR_OUT_OF_MEMORY,
+    RBFLITE_ERROR_LAME_INIT_PARAMS,
+    RBFLITE_ERROR_LAME_ENCODE_BUFFER,
+    RBFLITE_ERROR_LAME_ENCODE_FLUSH,
+};
 void usenglish_init(cst_voice *v);
 cst_lexicon *cmulex_init(void);
@@ -65,39 +90,166 @@ cst_lexicon *cmu_indic_lex_init(void);
 void cmu_grapheme_lang_init(cst_voice *v);
 cst_lexicon *cmu_grapheme_lex_init(void);
+typedef struct thread_queue_entry {
+    struct thread_queue_entry *next;
+    VALUE thread;
+} thread_queue_entry_t;
+typedef struct {
+    thread_queue_entry_t *head;
+    thread_queue_entry_t **tail;
+} thread_queue_t;
 typedef struct {
     cst_voice *voice;
+    thread_queue_t queue;
 } rbflite_voice_t;
+#define MIN_BUFFER_LIST_SIZE (64 * 1024)
+typedef struct buffer_list {
+    struct buffer_list *next;
+    size_t size;
+    size_t used;
+    char buf[1];
+} buffer_list_t;
 typedef struct {
     cst_voice *voice;
     const char *text;
     const char *outtype;
-    VALUE io;
-    int state;
-} voice_speech_arg_t;
+    void *encoder;
+    buffer_list_t *buffer_list;
+    buffer_list_t *buffer_list_last;
+    enum rbfile_error error;
+} voice_speech_data_t;
 typedef struct {
-    VALUE io;
-    void *data;
-    long size;
-} io_write_arg_t;
+    cst_audio_stream_callback asc;
+    void *(*encoder_init)(VALUE opts);
+    void (*encoder_fini)(void *encoder);
+} audio_stream_encoder_t;
 static VALUE rb_mFlite;
+static VALUE rb_eFliteError;
+static VALUE rb_eFliteRuntimeError;
 static VALUE rb_cVoice;
-static ID id_write;
+static VALUE sym_mp3;
+static VALUE sym_raw;
+static VALUE sym_wav;
+static struct timeval sleep_time_after_speaking;
+static buffer_list_t *buffer_list_alloc(size_t size);
+static void check_error(voice_speech_data_t *vsd);
+static void lock_thread(thread_queue_t *queue, thread_queue_entry_t *entry)
+{
+    /* enqueue the current thread to voice->queue. */
+    entry->next = NULL;
+    *queue->tail = entry;
+    queue->tail = &entry->next;
+    if (queue->head != entry) {
+        /* stop the current thread if other threads run. */
+        entry->thread = rb_thread_current();
+        rb_thread_stop();
+    }
+}
+static void unlock_thread(thread_queue_t *queue)
+{
+    /* dequeue the current thread from voice->queue. */
+    queue->head = queue->head->next;
+    if (queue->head == NULL) {
+        queue->tail = &queue->head;
+    } else {
+        /* resume the top of blocked threads. */
+        rb_thread_wakeup_alive(queue->head->thread);
+    }
+}
+static int add_data(voice_speech_data_t *vsd, const void *data, size_t size)
+{
+    buffer_list_t *list;
+    size_t rest;
+    if (vsd->buffer_list == NULL) {
+        list = buffer_list_alloc(size);
+        if (list == NULL) {
+            vsd->error = RBFLITE_ERROR_OUT_OF_MEMORY;
+            return -1;
+        }
+        vsd->buffer_list = vsd->buffer_list_last = list;
+    }
+    list = vsd->buffer_list_last;
+    rest = list->size - list->used;
+    if (size <= rest) {
+        memcpy(list->buf + list->used, data, size);
+        list->used += size;
+    } else {
+        memcpy(list->buf + list->used, data, rest);
+        list->used += rest;
+        data = (const char*)data + rest;
+        size -= rest;
+        list = buffer_list_alloc(size);
+        if (list == NULL) {
+            vsd->error = RBFLITE_ERROR_OUT_OF_MEMORY;
+            return -1;
+        }
+        memcpy(list->buf, data, size);
+        list->used = size;
+        vsd->buffer_list_last->next = list;
+        vsd->buffer_list_last = list;
+    }
+    return 0;
+}
+static buffer_list_t *buffer_list_alloc(size_t size)
+{
+    size_t alloc_size = MAX(size + offsetof(buffer_list_t, buf), MIN_BUFFER_LIST_SIZE);
+    buffer_list_t *list = xmalloc(alloc_size);
+    if (list == NULL) {
+        return NULL;
+    }
+    list->next = NULL;
+    list->size = alloc_size - offsetof(buffer_list_t, buf);
+    list->used = 0;
+    return list;
+}
+static void check_error(voice_speech_data_t *vsd)
+{
+    buffer_list_t *list, *list_next;
+    if (vsd->error == RBFLITE_ERROR_SUCCESS) {
+        return;
+    }
+    for (list = vsd->buffer_list; list != NULL; list = list_next) {
+        list_next = list->next;
+        xfree(list);
+    }
+    vsd->buffer_list = NULL;
+    switch (vsd->error) {
+    case RBFLITE_ERROR_OUT_OF_MEMORY:
+        rb_raise(rb_eNoMemError, "out of memory while writing speech data");
+    case RBFLITE_ERROR_LAME_INIT_PARAMS:
+        rb_raise(rb_eFliteRuntimeError, "lame_init_params() error");
+    case RBFLITE_ERROR_LAME_ENCODE_BUFFER:
+        rb_raise(rb_eFliteRuntimeError, "lame_encode_buffer() error");
+    case RBFLITE_ERROR_LAME_ENCODE_FLUSH:
+        rb_raise(rb_eFliteRuntimeError, "lame_encode_flush() error");
+    default:
+        rb_raise(rb_eFliteRuntimeError, "Unkown error %d", vsd->error);
+    }
+}
 /*
- * call graph:
+ *  Returns builtin voice names.
  *
- *  rbflite_audio_write_cb()
- *    --> rbfile_io_write_protect() via rb_thread_call_with_gvl()
- *     --> rbfile_io_write() via rb_protect()
+ *  @example
+ *    Flite.list_builtin_voices # => ["kal", "awb_time", "kal16", "awb", "rms", "slt"]
+ *
+ *  @return [Array]
  */
-static int rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg);
-static void *rbfile_io_write_protect(void *data);
-static VALUE rbfile_io_write(VALUE data);
 static VALUE
 flite_s_list_builtin_voices(VALUE klass)
 {
@@ -112,6 +264,53 @@ flite_s_list_builtin_voices(VALUE klass)
     return ary;
 }
+/*
+ *  Returns supported audio types used as the second argument of {Flite::Voice#to_speech}.
+ *
+ *  @example
+ *    # Compiled with mp3 support
+ *    Flite.supported_audio_types # => [:wav, :raw, :mp3]
+ *
+ *    # Compiled without mp3 support
+ *    Flite.supported_audio_types # => [:wav, :raw]
+ *
+ *  @return [Array]
+ */
+static VALUE
+flite_s_supported_audio_types(VALUE klass)
+{
+    VALUE ary = rb_ary_new();
+    rb_ary_push(ary, sym_wav);
+    rb_ary_push(ary, sym_raw);
+#ifdef HAVE_MP3LAME
+    rb_ary_push(ary, sym_mp3);
+#endif
+    return ary;
+}
+/*
+ * @overload sleep_time_after_speaking=(sec)
+ *
+ *  Sets sleep time after {Flite::Voice#speak}.
+ *  The default value is 0 on Unix and 0.3 on Windows.
+ *
+ *  This is workaround for voice cutoff on Windows.
+ *  The following code speaks "Hello Wor.. Hello World" without
+ *  0.3 seconds sleep.
+ *
+ *      "Hello World".speak # The last 0.3 seconds are cut off by the next speech on Windows.
+ *      "Hello World".speak
+ *
+ *  @param [Float] sec seconds to sleep
+ */
+static VALUE
+flite_s_set_sleep_time_after_speaking(VALUE klass, VALUE val)
+{
+    sleep_time_after_speaking = rb_time_interval(val);
+    return val;
+}
 static void
 rbfile_voice_free(rbflite_voice_t *voice)
 {
@@ -125,8 +324,10 @@ static VALUE
 rbflite_voice_s_allocate(VALUE klass)
 {
     rbflite_voice_t *voice;
+    VALUE obj = Data_Make_Struct(klass, rbflite_voice_t, NULL, rbfile_voice_free, voice);
-    return Data_Make_Struct(klass, rbflite_voice_t, NULL, rbfile_voice_free, voice);
+    voice->queue.tail = &voice->queue.head;
+    return obj;
 }
 #ifdef HAVE_FLITE_VOICE_LOAD
@@ -137,6 +338,28 @@ rbflite_voice_load(void *data)
 }
 #endif
+/*
+ * @overload initialize(name = nil)
+ *
+ *  Create a new voice specified by <code>name</code>.
+ *  If <code>name</code> includes '.' or '/' and ruby flite
+ *  is compiled for CMU Flite 2.0.0 or upper, try to
+ *  use a loadable voice.
+ *
+ *  @example
+ *
+ *    # Use default voice. It is 'kal' usually.
+ *    voice = Flite::Voice.new
+ *
+ *    # Use a builtin voice.
+ *    voice = Flite::Voice.new('awb')
+ *
+ *    # Use a lodable voice.
+ *    voice = Flite::Voice.new('/path/to/cmu_us_gka.flitevox')
+ *
+ *  @param [String] name
+ *  @see Flite.list_builtin_voices
+ */
 static VALUE
 rbflite_voice_initialize(int argc, VALUE *argv, VALUE self)
 {
@@ -174,18 +397,15 @@ rbflite_voice_initialize(int argc, VALUE *argv, VALUE self)
 static void *
 voice_speech_without_gvl(void *data)
 {
-    voice_speech_arg_t *arg = (voice_speech_arg_t *)data;
-    flite_text_to_speech(arg->text, arg->voice, arg->outtype);
+    voice_speech_data_t *vsd = (voice_speech_data_t *)data;
+    flite_text_to_speech(vsd->text, vsd->voice, vsd->outtype);
     return NULL;
 }
 static int
-rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
+wav_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
 {
-    voice_speech_arg_t *ud = (voice_speech_arg_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
-    io_write_arg_t arg;
-    arg.io = ud->io;
+    voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
     if (start == 0) {
         /* write WAVE file header. */
@@ -227,103 +447,363 @@ rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_las
         header.bitswidth = TO_LE2(sizeof(short) * 8);
         header.data_size = TO_LE4(data_size);
-        arg.data = &header;
-        arg.size = sizeof(header);
-        ud->state = (int)(VALUE)rb_thread_call_with_gvl(rbfile_io_write_protect, &arg);
-        if (ud->state != 0) {
+        if (add_data(vsd, &header, sizeof(header)) != 0) {
             return CST_AUDIO_STREAM_STOP;
         }
     }
-    arg.data = &w->samples[start];
-    arg.size = size * sizeof(short);
-    ud->state = (int)(VALUE)rb_thread_call_with_gvl(rbfile_io_write_protect, &arg);
-    if (ud->state != 0) {
+    if (add_data(vsd, &w->samples[start], size * sizeof(short)) != 0) {
         return CST_AUDIO_STREAM_STOP;
     }
+    return CST_AUDIO_STREAM_CONT;
+}
+static audio_stream_encoder_t wav_encoder = {
+    wav_encoder_cb,
+    NULL,
+    NULL,
+};
+static int
+raw_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
+{
+    voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
+    if (add_data(vsd, &w->samples[start], size * sizeof(short)) != 0) {
+        return CST_AUDIO_STREAM_STOP;
+    }
     return CST_AUDIO_STREAM_CONT;
 }
-static void *
-rbfile_io_write_protect(void *data)
+static audio_stream_encoder_t raw_encoder = {
+    raw_encoder_cb,
+    NULL,
+    NULL,
+};
+#ifdef HAVE_MP3LAME
+#define MAX_SAMPLE_SIZE 1024
+/* "mp3buf_size in bytes = 1.25*num_samples + 7200" according to lame.h. */
+#define MP3BUF_SIZE  (MAX_SAMPLE_SIZE + MAX_SAMPLE_SIZE / 4 + 7200)
+static int mp3_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
+{
+    voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
+    lame_global_flags *gf = vsd->encoder;
+    unsigned char mp3buf[MP3BUF_SIZE];
+    short *sptr = &w->samples[start];
+    short *eptr = sptr + size;
+    int rv;
+    if (start == 0) {
+        lame_set_num_samples(gf, cst_wave_num_samples(w));
+        lame_set_in_samplerate(gf, cst_wave_sample_rate(w));
+        lame_set_num_channels(gf, 1);
+        lame_set_mode(gf, MONO);
+        rv = lame_init_params(gf);
+        if (rv == -1) {
+            vsd->error = RBFLITE_ERROR_LAME_INIT_PARAMS;
+            return CST_AUDIO_STREAM_STOP;
+        }
+    }
+    while (eptr - sptr > MAX_SAMPLE_SIZE) {
+        rv = lame_encode_buffer(gf, sptr, NULL, MAX_SAMPLE_SIZE, mp3buf, sizeof(mp3buf));
+        if (rv < 0) {
+            vsd->error = RBFLITE_ERROR_LAME_ENCODE_BUFFER;
+            return CST_AUDIO_STREAM_STOP;
+        }
+        if (rv > 0) {
+            if (add_data(vsd, mp3buf, rv) != 0) {
+                return CST_AUDIO_STREAM_STOP;
+            }
+        }
+        sptr += MAX_SAMPLE_SIZE;
+    }
+    rv = lame_encode_buffer(gf, sptr, NULL, eptr - sptr, mp3buf, sizeof(mp3buf));
+    if (rv < 0) {
+        vsd->error = RBFLITE_ERROR_LAME_ENCODE_BUFFER;
+        return CST_AUDIO_STREAM_STOP;
+    }
+    if (rv > 0) {
+        if (add_data(vsd, mp3buf, rv) != 0) {
+            return CST_AUDIO_STREAM_STOP;
+        }
+    }
+    if (last) {
+        rv = lame_encode_flush(gf, mp3buf, sizeof(mp3buf));
+        if (rv < 0) {
+            vsd->error = RBFLITE_ERROR_LAME_ENCODE_FLUSH;
+            return CST_AUDIO_STREAM_STOP;
+        }
+        if (rv > 0) {
+            if (add_data(vsd, mp3buf, rv) != 0) {
+                return CST_AUDIO_STREAM_STOP;
+            }
+        }
+    }
+    return CST_AUDIO_STREAM_CONT;
+}
+static void *mp3_encoder_init(VALUE opts)
 {
-    int state = 0;
-    rb_protect(rbfile_io_write, (VALUE)data, &state);
-    return (void*)(VALUE)state;
+    lame_global_flags *gf = lame_init();
+    if (gf == NULL) {
+        rb_raise(rb_eFliteRuntimeError, "Failed to initialize lame");
+    }
+    lame_set_bWriteVbrTag(gf, 0);
+    lame_set_brate(gf, 64);
+    if (!NIL_P(opts)) {
+        VALUE v;
+        Check_Type(opts, T_HASH);
+        v = rb_hash_aref(opts, ID2SYM(rb_intern("bitrate")));
+        if (!NIL_P(v)) {
+            lame_set_brate(gf, NUM2INT(v));
+        }
+        v = rb_hash_aref(opts, ID2SYM(rb_intern("scale")));
+        if (!NIL_P(v)) {
+            lame_set_scale(gf, NUM2INT(v));
+        }
+        v = rb_hash_aref(opts, ID2SYM(rb_intern("quality")));
+        if (!NIL_P(v)) {
+            lame_set_quality(gf, NUM2INT(v));
+        }
+    }
+    lame_set_bWriteVbrTag(gf, 0);
+    return gf;
+}
+static void mp3_encoder_fini(void *encoder)
+{
+    lame_close(encoder);
 }
+static audio_stream_encoder_t mp3_encoder = {
+    mp3_encoder_cb,
+    mp3_encoder_init,
+    mp3_encoder_fini,
+};
+#endif
+/*
+ * @overload speak(text)
+ *
+ *  Speak the <code>text</code>.
+ *
+ *  @example
+ *    voice = Flite::Voice.new
+ *
+ *    # Speak 'Hello Flite World!'
+ *    voice.speak('Hello Flite World!')
+ *
+ *  @param [String] text
+ */
 static VALUE
-rbfile_io_write(VALUE data)
+rbflite_voice_speak(VALUE self, VALUE text)
 {
-    const io_write_arg_t *arg = (const io_write_arg_t *)data;
-    rb_funcall(arg->io, id_write, 1, rb_str_new(arg->data, arg->size));
-    return Qnil;
+    rbflite_voice_t *voice = DATA_PTR(self);
+    voice_speech_data_t vsd;
+    thread_queue_entry_t entry;
+    if (voice->voice == NULL) {
+        rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
+    }
+    vsd.voice = voice->voice;
+    vsd.text = StringValueCStr(text);
+    vsd.outtype = "play";
+    vsd.buffer_list = NULL;
+    vsd.buffer_list_last = NULL;
+    vsd.error = RBFLITE_ERROR_SUCCESS;
+    lock_thread(&voice->queue, &entry);
+    rb_thread_call_without_gvl(voice_speech_without_gvl, &vsd, NULL, NULL);
+    RB_GC_GUARD(text);
+    unlock_thread(&voice->queue);
+    check_error(&vsd);
+    if (sleep_time_after_speaking.tv_sec != 0 || sleep_time_after_speaking.tv_usec != 0) {
+        rb_thread_wait_for(sleep_time_after_speaking);
+    }
+    return self;
 }
+/*
+ * @overload to_speech(text, audio_type = :wav, opts = {})
+ *
+ *  Converts <code>text</code> to audio data.
+ *
+ *  @example
+ *    voice = Flite::Voice.new
+ *
+ *    # Save speech as wav
+ *    File.binwrite('hello_flite_world.wav',
+ *                  voice.to_speech('Hello Flite World!'))
+ *
+ *    # Save speech as raw pcm (signed 16 bit little endian, rate 8000 Hz, mono)
+ *    File.binwrite('hello_flite_world.raw',
+ *                  voice.to_speech('Hello Flite World!', :raw))
+ *
+ *    # Save speech as mp3
+ *    File.binwrite('hello_flite_world.mp3',
+ *                  voice.to_speech('Hello Flite World!', :mp3))
+ *
+ *    # Save speech as mp3 whose bitrate is 128k.
+ *    File.binwrite('hello_flite_world.mp3',
+ *                  voice.to_speech('Hello Flite World!', :mp3, :bitrate => 128))
+ *
+ *  @param [String] text
+ *  @param [Symbol] audo_type :wav, :raw or :mp3 (when mp3 support is enabled)
+ *  @param [Hash]   opts  audio encoder options
+ *  @return [String] audio data
+ *  @see Flite.supported_audio_types
+ */
 static VALUE
-rbflite_voice_speech(int argc, VALUE *argv, VALUE self)
+rbflite_voice_to_speech(int argc, VALUE *argv, VALUE self)
 {
     rbflite_voice_t *voice = DATA_PTR(self);
     VALUE text;
-    VALUE out;
+    VALUE audio_type;
+    VALUE opts;
     cst_audio_streaming_info *asi = NULL;
-    voice_speech_arg_t arg;
+    audio_stream_encoder_t *encoder;
+    voice_speech_data_t vsd;
+    thread_queue_entry_t entry;
+    buffer_list_t *list, *list_next;
+    size_t size;
+    VALUE speech_data;
+    char *ptr;
     if (voice->voice == NULL) {
-        rb_raise(rb_eRuntimeError, "not initialized");
-    }
-    rb_scan_args(argc, argv, "11", &text, &out);
-    arg.voice = voice->voice;
-    arg.text = StringValueCStr(text);
-    arg.io = Qnil;
-    arg.state = 0;
-    if (NIL_P(out)) {
-        /* play audio */
-        arg.outtype = "play";
-    } else if (rb_respond_to(out, id_write)) {
-        /* write to an object */
-        asi = new_audio_streaming_info();
-        if (asi == NULL) {
-            rb_raise(rb_eNoMemError, "failed to allocate audio_streaming_info");
-        }
-        asi->asc = rbflite_audio_write_cb;
-        asi->userdata = (void*)&arg;
-        feat_set(voice->voice->features, "streaming_info", audio_streaming_info_val(asi));
-        arg.outtype = "stream";
-        arg.io = out;
+        rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
+    }
+    rb_scan_args(argc, argv, "12", &text, &audio_type, &opts);
+    if (NIL_P(audio_type)) {
+        encoder = &wav_encoder;
     } else {
-        /* write to a file */
-        out = rb_str_export_to_enc(out, rb_filesystem_encoding());
-        arg.outtype = StringValueCStr(out);
+        if (rb_equal(audio_type, sym_wav)) {
+            encoder = &wav_encoder;
+        } else if (rb_equal(audio_type, sym_raw)) {
+            encoder = &raw_encoder;
+#ifdef HAVE_MP3LAME
+        } else if (rb_equal(audio_type, sym_mp3)) {
+            encoder = &mp3_encoder;
+#endif
+        } else {
+            rb_raise(rb_eArgError, "unknown audio type");
+        }
     }
-    rb_thread_call_without_gvl(voice_speech_without_gvl, &arg, NULL, NULL);
-    RB_GC_GUARD(text);
-    RB_GC_GUARD(out);
+    vsd.voice = voice->voice;
+    vsd.text = StringValueCStr(text);
+    vsd.outtype = "stream";
+    vsd.encoder = NULL;
+    vsd.buffer_list = NULL;
+    vsd.buffer_list_last = NULL;
+    vsd.error = RBFLITE_ERROR_SUCCESS;
-    if (asi != NULL) {
-        flite_feat_remove(voice->voice->features, "streaming_info");
-        if (arg.state != 0) {
-            rb_jump_tag(arg.state);
+    if (encoder->encoder_init) {
+        vsd.encoder = encoder->encoder_init(opts);
+    }
+    /* write to an object */
+    asi = new_audio_streaming_info();
+    if (asi == NULL) {
+        if (encoder->encoder_fini) {
+            encoder->encoder_fini(vsd.encoder);
         }
+        rb_raise(rb_eNoMemError, "failed to allocate audio_streaming_info");
     }
-    return self;
+    asi->asc = encoder->asc;
+    asi->userdata = (void*)&vsd;
+    lock_thread(&voice->queue, &entry);
+    flite_feat_set(voice->voice->features, "streaming_info", audio_streaming_info_val(asi));
+    rb_thread_call_without_gvl(voice_speech_without_gvl, &vsd, NULL, NULL);
+    flite_feat_remove(voice->voice->features, "streaming_info");
+    RB_GC_GUARD(text);
+    unlock_thread(&voice->queue);
+    if (encoder->encoder_fini) {
+        encoder->encoder_fini(vsd.encoder);
+    }
+    check_error(&vsd);
+    size = 0;
+    for (list = vsd.buffer_list; list != NULL; list = list->next) {
+        size += list->used;
+    }
+    speech_data = rb_str_buf_new(size);
+    ptr = RSTRING_PTR(speech_data);
+    for (list = vsd.buffer_list; list != NULL; list = list_next) {
+        memcpy(ptr, list->buf, list->used);
+        ptr += list->used;
+        list_next = list->next;
+        xfree(list);
+    }
+    rb_str_set_len(speech_data, size);
+    return speech_data;
 }
+/*
+ * @overload name
+ *
+ *  Returns voice name.
+ *
+ *  @example
+ *    voice = Flite::Voice.new('slt')
+ *    voice.name => 'slt'
+ *
+ *    # voice loading is a new feature of CMU Flite 2.0.0.
+ *    voice = Flite::Voice.new('/path/to/cmu_us_fem.flitevox')
+ *    voice.name => 'cmu_us_fem'
+ *
+ *  @return [String]
+ */
 static VALUE
 rbflite_voice_name(VALUE self)
 {
     rbflite_voice_t *voice = DATA_PTR(self);
     if (voice->voice == NULL) {
-        rb_raise(rb_eRuntimeError, "not initialized");
+        rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
     }
     return rb_usascii_str_new_cstr(voice->voice->name);
 }
+/*
+ * @overload pathname
+ *
+ *  Returns the path of the voice if the voice is a loadable voice.
+ *  Otherwise, nil.
+ *
+ *  @example
+ *    voice = Flite::Voice.new
+ *    voice.pathname => 'kal'
+ *
+ *    # voice loading is a new feature of CMU Flite 2.0.0.
+ *    voice = Flite::Voice.new('/path/to/cmu_us_aup.flitevox')
+ *    voice.pathname => '/path/to/cmu_us_aup.flitevox'
+ *
+ *  @return [String]
+ */
 static VALUE
 rbflite_voice_pathname(VALUE self)
 {
@@ -331,23 +811,60 @@ rbflite_voice_pathname(VALUE self)
     const char *pathname;
     if (voice->voice == NULL) {
-        rb_raise(rb_eRuntimeError, "not initialized");
+        rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
     }
-    pathname = get_param_string(voice->voice->features, "pathname", "");
+    pathname = flite_get_param_string(voice->voice->features, "pathname", "");
     if (pathname[0] == '\0') {
         return Qnil;
     }
     return rb_usascii_str_new_cstr(pathname);
 }
+/*
+ * @overload inspect
+ *
+ *  Returns the value as a string for inspection.
+ *
+ *  @return [String]
+ *  @private
+ */
+static VALUE
+rbflite_voice_inspect(VALUE self)
+{
+    rbflite_voice_t *voice = DATA_PTR(self);
+    const char *class_name = rb_obj_classname(self);
+    const char *voice_name;
+    const char *pathname;
+    if (voice->voice == NULL) {
+        return rb_sprintf("#<%s: not initialized>", class_name);
+    }
+    voice_name = voice->voice->name;
+    pathname = flite_get_param_string(voice->voice->features, "pathname", "");
+    if (pathname[0] == '\0') {
+        return rb_sprintf("#<%s: %s>", class_name, voice_name);
+    } else {
+        return rb_sprintf("#<%s: %s (%s)>", class_name, voice_name, pathname);
+    }
+}
+#ifdef _WIN32
+__declspec(dllexport) void Init_flite(void);
+#endif
 void
 Init_flite(void)
 {
     VALUE cmu_flite_version;
-    id_write = rb_intern("write");
+    sym_mp3 = ID2SYM(rb_intern("mp3"));
+    sym_raw = ID2SYM(rb_intern("raw"));
+    sym_wav = ID2SYM(rb_intern("wav"));
     rb_mFlite = rb_define_module("Flite");
+    rb_eFliteError = rb_define_class_under(rb_mFlite, "Error", rb_eStandardError);
+    rb_eFliteRuntimeError = rb_define_class_under(rb_mFlite, "Runtime", rb_eFliteError);
     cmu_flite_version = rb_usascii_str_new_cstr(FLITE_PROJECT_VERSION);
     OBJ_FREEZE(cmu_flite_version);
@@ -367,11 +884,15 @@ Init_flite(void)
 #endif
     rb_define_singleton_method(rb_mFlite, "list_builtin_voices", flite_s_list_builtin_voices, 0);
+    rb_define_singleton_method(rb_mFlite, "supported_audio_types", flite_s_supported_audio_types, 0);
+    rb_define_singleton_method(rb_mFlite, "sleep_time_after_speaking=", flite_s_set_sleep_time_after_speaking, 1);
     rb_cVoice = rb_define_class_under(rb_mFlite, "Voice", rb_cObject);
     rb_define_alloc_func(rb_cVoice, rbflite_voice_s_allocate);
     rb_define_method(rb_cVoice, "initialize", rbflite_voice_initialize, -1);
-    rb_define_method(rb_cVoice, "speech", rbflite_voice_speech, -1);
+    rb_define_method(rb_cVoice, "speak", rbflite_voice_speak, 1);
+    rb_define_method(rb_cVoice, "to_speech", rbflite_voice_to_speech, -1);
     rb_define_method(rb_cVoice, "name", rbflite_voice_name, 0);
     rb_define_method(rb_cVoice, "pathname", rbflite_voice_pathname, 0);
+    rb_define_method(rb_cVoice, "inspect", rbflite_voice_inspect, 0);
 }