flite 0.0.3.1-x86-mingw32 → 0.1.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,6 +38,13 @@
38
38
  #include "rbflite.h"
39
39
  #include <flite/flite_version.h>
40
40
 
41
+ #ifndef MIN
42
+ #define MIN(a, b) ((a) < (b)) ? (a) : (b)
43
+ #endif
44
+ #ifndef MAX
45
+ #define MAX(a, b) ((a) > (b)) ? (a) : (b)
46
+ #endif
47
+
41
48
  #ifdef WORDS_BIGENDIAN
42
49
  #define TO_LE4(num) SWAPINT(num)
43
50
  #defien TO_LE2(num) SWAPSHORT(num)
@@ -46,6 +53,16 @@
46
53
  #define TO_LE2(num) (num)
47
54
  #endif
48
55
 
56
+ #ifdef HAVE_LAME_LAME_H
57
+ #include <lame/lame.h>
58
+ #define HAVE_MP3LAME 1
59
+ #endif
60
+
61
+ #ifdef HAVE_LAME_H
62
+ #include <lame.h>
63
+ #define HAVE_MP3LAME 1
64
+ #endif
65
+
49
66
  #ifdef HAVE_CST_AUDIO_STREAMING_INFO_UTT
50
67
  /* flite 2.0.0 */
51
68
  typedef struct cst_audio_streaming_info_struct *asc_last_arg_t;
@@ -56,6 +73,14 @@ typedef void *asc_last_arg_t;
56
73
  #define ASC_LAST_ARG_TO_USERDATA(last_arg) (last_arg)
57
74
  #endif
58
75
 
76
+ enum rbfile_error {
77
+ RBFLITE_ERROR_SUCCESS,
78
+ RBFLITE_ERROR_OUT_OF_MEMORY,
79
+ RBFLITE_ERROR_LAME_INIT_PARAMS,
80
+ RBFLITE_ERROR_LAME_ENCODE_BUFFER,
81
+ RBFLITE_ERROR_LAME_ENCODE_FLUSH,
82
+ };
83
+
59
84
  void usenglish_init(cst_voice *v);
60
85
  cst_lexicon *cmulex_init(void);
61
86
 
@@ -65,39 +90,166 @@ cst_lexicon *cmu_indic_lex_init(void);
65
90
  void cmu_grapheme_lang_init(cst_voice *v);
66
91
  cst_lexicon *cmu_grapheme_lex_init(void);
67
92
 
93
+ typedef struct thread_queue_entry {
94
+ struct thread_queue_entry *next;
95
+ VALUE thread;
96
+ } thread_queue_entry_t;
97
+
98
+ typedef struct {
99
+ thread_queue_entry_t *head;
100
+ thread_queue_entry_t **tail;
101
+ } thread_queue_t;
102
+
68
103
  typedef struct {
69
104
  cst_voice *voice;
105
+ thread_queue_t queue;
70
106
  } rbflite_voice_t;
71
107
 
108
+ #define MIN_BUFFER_LIST_SIZE (64 * 1024)
109
+ typedef struct buffer_list {
110
+ struct buffer_list *next;
111
+ size_t size;
112
+ size_t used;
113
+ char buf[1];
114
+ } buffer_list_t;
115
+
72
116
  typedef struct {
73
117
  cst_voice *voice;
74
118
  const char *text;
75
119
  const char *outtype;
76
- VALUE io;
77
- int state;
78
- } voice_speech_arg_t;
120
+ void *encoder;
121
+ buffer_list_t *buffer_list;
122
+ buffer_list_t *buffer_list_last;
123
+ enum rbfile_error error;
124
+ } voice_speech_data_t;
79
125
 
80
126
  typedef struct {
81
- VALUE io;
82
- void *data;
83
- long size;
84
- } io_write_arg_t;
127
+ cst_audio_stream_callback asc;
128
+ void *(*encoder_init)(VALUE opts);
129
+ void (*encoder_fini)(void *encoder);
130
+ } audio_stream_encoder_t;
85
131
 
86
132
  static VALUE rb_mFlite;
133
+ static VALUE rb_eFliteError;
134
+ static VALUE rb_eFliteRuntimeError;
87
135
  static VALUE rb_cVoice;
88
- static ID id_write;
136
+ static VALUE sym_mp3;
137
+ static VALUE sym_raw;
138
+ static VALUE sym_wav;
139
+ static struct timeval sleep_time_after_speaking;
140
+
141
+ static buffer_list_t *buffer_list_alloc(size_t size);
142
+ static void check_error(voice_speech_data_t *vsd);
143
+
144
+ static void lock_thread(thread_queue_t *queue, thread_queue_entry_t *entry)
145
+ {
146
+ /* enqueue the current thread to voice->queue. */
147
+ entry->next = NULL;
148
+ *queue->tail = entry;
149
+ queue->tail = &entry->next;
150
+ if (queue->head != entry) {
151
+ /* stop the current thread if other threads run. */
152
+ entry->thread = rb_thread_current();
153
+ rb_thread_stop();
154
+ }
155
+ }
156
+
157
+ static void unlock_thread(thread_queue_t *queue)
158
+ {
159
+ /* dequeue the current thread from voice->queue. */
160
+ queue->head = queue->head->next;
161
+ if (queue->head == NULL) {
162
+ queue->tail = &queue->head;
163
+ } else {
164
+ /* resume the top of blocked threads. */
165
+ rb_thread_wakeup_alive(queue->head->thread);
166
+ }
167
+ }
168
+
169
+ static int add_data(voice_speech_data_t *vsd, const void *data, size_t size)
170
+ {
171
+ buffer_list_t *list;
172
+ size_t rest;
173
+
174
+ if (vsd->buffer_list == NULL) {
175
+ list = buffer_list_alloc(size);
176
+ if (list == NULL) {
177
+ vsd->error = RBFLITE_ERROR_OUT_OF_MEMORY;
178
+ return -1;
179
+ }
180
+ vsd->buffer_list = vsd->buffer_list_last = list;
181
+ }
182
+ list = vsd->buffer_list_last;
183
+ rest = list->size - list->used;
184
+ if (size <= rest) {
185
+ memcpy(list->buf + list->used, data, size);
186
+ list->used += size;
187
+ } else {
188
+ memcpy(list->buf + list->used, data, rest);
189
+ list->used += rest;
190
+ data = (const char*)data + rest;
191
+ size -= rest;
192
+ list = buffer_list_alloc(size);
193
+ if (list == NULL) {
194
+ vsd->error = RBFLITE_ERROR_OUT_OF_MEMORY;
195
+ return -1;
196
+ }
197
+ memcpy(list->buf, data, size);
198
+ list->used = size;
199
+ vsd->buffer_list_last->next = list;
200
+ vsd->buffer_list_last = list;
201
+ }
202
+ return 0;
203
+ }
204
+
205
+ static buffer_list_t *buffer_list_alloc(size_t size)
206
+ {
207
+ size_t alloc_size = MAX(size + offsetof(buffer_list_t, buf), MIN_BUFFER_LIST_SIZE);
208
+ buffer_list_t *list = xmalloc(alloc_size);
209
+
210
+ if (list == NULL) {
211
+ return NULL;
212
+ }
213
+ list->next = NULL;
214
+ list->size = alloc_size - offsetof(buffer_list_t, buf);
215
+ list->used = 0;
216
+ return list;
217
+ }
218
+
219
+ static void check_error(voice_speech_data_t *vsd)
220
+ {
221
+ buffer_list_t *list, *list_next;
222
+
223
+ if (vsd->error == RBFLITE_ERROR_SUCCESS) {
224
+ return;
225
+ }
226
+ for (list = vsd->buffer_list; list != NULL; list = list_next) {
227
+ list_next = list->next;
228
+ xfree(list);
229
+ }
230
+ vsd->buffer_list = NULL;
231
+ switch (vsd->error) {
232
+ case RBFLITE_ERROR_OUT_OF_MEMORY:
233
+ rb_raise(rb_eNoMemError, "out of memory while writing speech data");
234
+ case RBFLITE_ERROR_LAME_INIT_PARAMS:
235
+ rb_raise(rb_eFliteRuntimeError, "lame_init_params() error");
236
+ case RBFLITE_ERROR_LAME_ENCODE_BUFFER:
237
+ rb_raise(rb_eFliteRuntimeError, "lame_encode_buffer() error");
238
+ case RBFLITE_ERROR_LAME_ENCODE_FLUSH:
239
+ rb_raise(rb_eFliteRuntimeError, "lame_encode_flush() error");
240
+ default:
241
+ rb_raise(rb_eFliteRuntimeError, "Unkown error %d", vsd->error);
242
+ }
243
+ }
89
244
 
90
245
  /*
91
- * call graph:
246
+ * Returns builtin voice names.
92
247
  *
93
- * rbflite_audio_write_cb()
94
- * --> rbfile_io_write_protect() via rb_thread_call_with_gvl()
95
- * --> rbfile_io_write() via rb_protect()
248
+ * @example
249
+ * Flite.list_builtin_voices # => ["kal", "awb_time", "kal16", "awb", "rms", "slt"]
250
+ *
251
+ * @return [Array]
96
252
  */
97
- static int rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg);
98
- static void *rbfile_io_write_protect(void *data);
99
- static VALUE rbfile_io_write(VALUE data);
100
-
101
253
  static VALUE
102
254
  flite_s_list_builtin_voices(VALUE klass)
103
255
  {
@@ -112,6 +264,53 @@ flite_s_list_builtin_voices(VALUE klass)
112
264
  return ary;
113
265
  }
114
266
 
267
+ /*
268
+ * Returns supported audio types used as the second argument of {Flite::Voice#to_speech}.
269
+ *
270
+ * @example
271
+ * # Compiled with mp3 support
272
+ * Flite.supported_audio_types # => [:wav, :raw, :mp3]
273
+ *
274
+ * # Compiled without mp3 support
275
+ * Flite.supported_audio_types # => [:wav, :raw]
276
+ *
277
+ * @return [Array]
278
+ */
279
+ static VALUE
280
+ flite_s_supported_audio_types(VALUE klass)
281
+ {
282
+ VALUE ary = rb_ary_new();
283
+
284
+ rb_ary_push(ary, sym_wav);
285
+ rb_ary_push(ary, sym_raw);
286
+ #ifdef HAVE_MP3LAME
287
+ rb_ary_push(ary, sym_mp3);
288
+ #endif
289
+ return ary;
290
+ }
291
+
292
+ /*
293
+ * @overload sleep_time_after_speaking=(sec)
294
+ *
295
+ * Sets sleep time after {Flite::Voice#speak}.
296
+ * The default value is 0 on Unix and 0.3 on Windows.
297
+ *
298
+ * This is workaround for voice cutoff on Windows.
299
+ * The following code speaks "Hello Wor.. Hello World" without
300
+ * 0.3 seconds sleep.
301
+ *
302
+ * "Hello World".speak # The last 0.3 seconds are cut off by the next speech on Windows.
303
+ * "Hello World".speak
304
+ *
305
+ * @param [Float] sec seconds to sleep
306
+ */
307
+ static VALUE
308
+ flite_s_set_sleep_time_after_speaking(VALUE klass, VALUE val)
309
+ {
310
+ sleep_time_after_speaking = rb_time_interval(val);
311
+ return val;
312
+ }
313
+
115
314
  static void
116
315
  rbfile_voice_free(rbflite_voice_t *voice)
117
316
  {
@@ -125,8 +324,10 @@ static VALUE
125
324
  rbflite_voice_s_allocate(VALUE klass)
126
325
  {
127
326
  rbflite_voice_t *voice;
327
+ VALUE obj = Data_Make_Struct(klass, rbflite_voice_t, NULL, rbfile_voice_free, voice);
128
328
 
129
- return Data_Make_Struct(klass, rbflite_voice_t, NULL, rbfile_voice_free, voice);
329
+ voice->queue.tail = &voice->queue.head;
330
+ return obj;
130
331
  }
131
332
 
132
333
  #ifdef HAVE_FLITE_VOICE_LOAD
@@ -137,6 +338,28 @@ rbflite_voice_load(void *data)
137
338
  }
138
339
  #endif
139
340
 
341
+ /*
342
+ * @overload initialize(name = nil)
343
+ *
344
+ * Create a new voice specified by <code>name</code>.
345
+ * If <code>name</code> includes '.' or '/' and ruby flite
346
+ * is compiled for CMU Flite 2.0.0 or upper, try to
347
+ * use a loadable voice.
348
+ *
349
+ * @example
350
+ *
351
+ * # Use default voice. It is 'kal' usually.
352
+ * voice = Flite::Voice.new
353
+ *
354
+ * # Use a builtin voice.
355
+ * voice = Flite::Voice.new('awb')
356
+ *
357
+ * # Use a lodable voice.
358
+ * voice = Flite::Voice.new('/path/to/cmu_us_gka.flitevox')
359
+ *
360
+ * @param [String] name
361
+ * @see Flite.list_builtin_voices
362
+ */
140
363
  static VALUE
141
364
  rbflite_voice_initialize(int argc, VALUE *argv, VALUE self)
142
365
  {
@@ -174,18 +397,15 @@ rbflite_voice_initialize(int argc, VALUE *argv, VALUE self)
174
397
  static void *
175
398
  voice_speech_without_gvl(void *data)
176
399
  {
177
- voice_speech_arg_t *arg = (voice_speech_arg_t *)data;
178
- flite_text_to_speech(arg->text, arg->voice, arg->outtype);
400
+ voice_speech_data_t *vsd = (voice_speech_data_t *)data;
401
+ flite_text_to_speech(vsd->text, vsd->voice, vsd->outtype);
179
402
  return NULL;
180
403
  }
181
404
 
182
405
  static int
183
- rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
406
+ wav_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
184
407
  {
185
- voice_speech_arg_t *ud = (voice_speech_arg_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
186
- io_write_arg_t arg;
187
-
188
- arg.io = ud->io;
408
+ voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
189
409
 
190
410
  if (start == 0) {
191
411
  /* write WAVE file header. */
@@ -227,103 +447,363 @@ rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_las
227
447
  header.bitswidth = TO_LE2(sizeof(short) * 8);
228
448
  header.data_size = TO_LE4(data_size);
229
449
 
230
- arg.data = &header;
231
- arg.size = sizeof(header);
232
- ud->state = (int)(VALUE)rb_thread_call_with_gvl(rbfile_io_write_protect, &arg);
233
- if (ud->state != 0) {
450
+ if (add_data(vsd, &header, sizeof(header)) != 0) {
234
451
  return CST_AUDIO_STREAM_STOP;
235
452
  }
236
453
  }
237
454
 
238
- arg.data = &w->samples[start];
239
- arg.size = size * sizeof(short);
240
- ud->state = (int)(VALUE)rb_thread_call_with_gvl(rbfile_io_write_protect, &arg);
241
- if (ud->state != 0) {
455
+ if (add_data(vsd, &w->samples[start], size * sizeof(short)) != 0) {
242
456
  return CST_AUDIO_STREAM_STOP;
243
457
  }
458
+ return CST_AUDIO_STREAM_CONT;
459
+ }
460
+
461
+ static audio_stream_encoder_t wav_encoder = {
462
+ wav_encoder_cb,
463
+ NULL,
464
+ NULL,
465
+ };
244
466
 
467
+ static int
468
+ raw_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
469
+ {
470
+ voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
471
+
472
+ if (add_data(vsd, &w->samples[start], size * sizeof(short)) != 0) {
473
+ return CST_AUDIO_STREAM_STOP;
474
+ }
245
475
  return CST_AUDIO_STREAM_CONT;
246
476
  }
247
477
 
248
- static void *
249
- rbfile_io_write_protect(void *data)
478
+ static audio_stream_encoder_t raw_encoder = {
479
+ raw_encoder_cb,
480
+ NULL,
481
+ NULL,
482
+ };
483
+
484
+ #ifdef HAVE_MP3LAME
485
+
486
+ #define MAX_SAMPLE_SIZE 1024
487
+ /* "mp3buf_size in bytes = 1.25*num_samples + 7200" according to lame.h. */
488
+ #define MP3BUF_SIZE (MAX_SAMPLE_SIZE + MAX_SAMPLE_SIZE / 4 + 7200)
489
+ static int mp3_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
490
+ {
491
+ voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
492
+ lame_global_flags *gf = vsd->encoder;
493
+ unsigned char mp3buf[MP3BUF_SIZE];
494
+ short *sptr = &w->samples[start];
495
+ short *eptr = sptr + size;
496
+ int rv;
497
+
498
+ if (start == 0) {
499
+ lame_set_num_samples(gf, cst_wave_num_samples(w));
500
+ lame_set_in_samplerate(gf, cst_wave_sample_rate(w));
501
+ lame_set_num_channels(gf, 1);
502
+ lame_set_mode(gf, MONO);
503
+ rv = lame_init_params(gf);
504
+ if (rv == -1) {
505
+ vsd->error = RBFLITE_ERROR_LAME_INIT_PARAMS;
506
+ return CST_AUDIO_STREAM_STOP;
507
+ }
508
+ }
509
+ while (eptr - sptr > MAX_SAMPLE_SIZE) {
510
+ rv = lame_encode_buffer(gf, sptr, NULL, MAX_SAMPLE_SIZE, mp3buf, sizeof(mp3buf));
511
+ if (rv < 0) {
512
+ vsd->error = RBFLITE_ERROR_LAME_ENCODE_BUFFER;
513
+ return CST_AUDIO_STREAM_STOP;
514
+ }
515
+ if (rv > 0) {
516
+ if (add_data(vsd, mp3buf, rv) != 0) {
517
+ return CST_AUDIO_STREAM_STOP;
518
+ }
519
+ }
520
+ sptr += MAX_SAMPLE_SIZE;
521
+ }
522
+ rv = lame_encode_buffer(gf, sptr, NULL, eptr - sptr, mp3buf, sizeof(mp3buf));
523
+ if (rv < 0) {
524
+ vsd->error = RBFLITE_ERROR_LAME_ENCODE_BUFFER;
525
+ return CST_AUDIO_STREAM_STOP;
526
+ }
527
+ if (rv > 0) {
528
+ if (add_data(vsd, mp3buf, rv) != 0) {
529
+ return CST_AUDIO_STREAM_STOP;
530
+ }
531
+ }
532
+ if (last) {
533
+ rv = lame_encode_flush(gf, mp3buf, sizeof(mp3buf));
534
+ if (rv < 0) {
535
+ vsd->error = RBFLITE_ERROR_LAME_ENCODE_FLUSH;
536
+ return CST_AUDIO_STREAM_STOP;
537
+ }
538
+ if (rv > 0) {
539
+ if (add_data(vsd, mp3buf, rv) != 0) {
540
+ return CST_AUDIO_STREAM_STOP;
541
+ }
542
+ }
543
+ }
544
+ return CST_AUDIO_STREAM_CONT;
545
+ }
546
+
547
+ static void *mp3_encoder_init(VALUE opts)
250
548
  {
251
- int state = 0;
252
- rb_protect(rbfile_io_write, (VALUE)data, &state);
253
- return (void*)(VALUE)state;
549
+ lame_global_flags *gf = lame_init();
550
+
551
+ if (gf == NULL) {
552
+ rb_raise(rb_eFliteRuntimeError, "Failed to initialize lame");
553
+ }
554
+
555
+ lame_set_bWriteVbrTag(gf, 0);
556
+ lame_set_brate(gf, 64);
557
+
558
+ if (!NIL_P(opts)) {
559
+ VALUE v;
560
+ Check_Type(opts, T_HASH);
561
+
562
+ v = rb_hash_aref(opts, ID2SYM(rb_intern("bitrate")));
563
+ if (!NIL_P(v)) {
564
+ lame_set_brate(gf, NUM2INT(v));
565
+ }
566
+
567
+ v = rb_hash_aref(opts, ID2SYM(rb_intern("scale")));
568
+ if (!NIL_P(v)) {
569
+ lame_set_scale(gf, NUM2INT(v));
570
+ }
571
+
572
+ v = rb_hash_aref(opts, ID2SYM(rb_intern("quality")));
573
+ if (!NIL_P(v)) {
574
+ lame_set_quality(gf, NUM2INT(v));
575
+ }
576
+ }
577
+
578
+ lame_set_bWriteVbrTag(gf, 0);
579
+ return gf;
580
+ }
581
+
582
+ static void mp3_encoder_fini(void *encoder)
583
+ {
584
+ lame_close(encoder);
254
585
  }
255
586
 
587
+ static audio_stream_encoder_t mp3_encoder = {
588
+ mp3_encoder_cb,
589
+ mp3_encoder_init,
590
+ mp3_encoder_fini,
591
+ };
592
+
593
+ #endif
594
+
595
+ /*
596
+ * @overload speak(text)
597
+ *
598
+ * Speak the <code>text</code>.
599
+ *
600
+ * @example
601
+ * voice = Flite::Voice.new
602
+ *
603
+ * # Speak 'Hello Flite World!'
604
+ * voice.speak('Hello Flite World!')
605
+ *
606
+ * @param [String] text
607
+ */
256
608
  static VALUE
257
- rbfile_io_write(VALUE data)
609
+ rbflite_voice_speak(VALUE self, VALUE text)
258
610
  {
259
- const io_write_arg_t *arg = (const io_write_arg_t *)data;
260
- rb_funcall(arg->io, id_write, 1, rb_str_new(arg->data, arg->size));
261
- return Qnil;
611
+ rbflite_voice_t *voice = DATA_PTR(self);
612
+ voice_speech_data_t vsd;
613
+ thread_queue_entry_t entry;
614
+
615
+ if (voice->voice == NULL) {
616
+ rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
617
+ }
618
+
619
+ vsd.voice = voice->voice;
620
+ vsd.text = StringValueCStr(text);
621
+ vsd.outtype = "play";
622
+ vsd.buffer_list = NULL;
623
+ vsd.buffer_list_last = NULL;
624
+ vsd.error = RBFLITE_ERROR_SUCCESS;
625
+
626
+ lock_thread(&voice->queue, &entry);
627
+
628
+ rb_thread_call_without_gvl(voice_speech_without_gvl, &vsd, NULL, NULL);
629
+ RB_GC_GUARD(text);
630
+
631
+ unlock_thread(&voice->queue);
632
+
633
+ check_error(&vsd);
634
+
635
+ if (sleep_time_after_speaking.tv_sec != 0 || sleep_time_after_speaking.tv_usec != 0) {
636
+ rb_thread_wait_for(sleep_time_after_speaking);
637
+ }
638
+
639
+ return self;
262
640
  }
263
641
 
642
+ /*
643
+ * @overload to_speech(text, audio_type = :wav, opts = {})
644
+ *
645
+ * Converts <code>text</code> to audio data.
646
+ *
647
+ * @example
648
+ * voice = Flite::Voice.new
649
+ *
650
+ * # Save speech as wav
651
+ * File.binwrite('hello_flite_world.wav',
652
+ * voice.to_speech('Hello Flite World!'))
653
+ *
654
+ * # Save speech as raw pcm (signed 16 bit little endian, rate 8000 Hz, mono)
655
+ * File.binwrite('hello_flite_world.raw',
656
+ * voice.to_speech('Hello Flite World!', :raw))
657
+ *
658
+ * # Save speech as mp3
659
+ * File.binwrite('hello_flite_world.mp3',
660
+ * voice.to_speech('Hello Flite World!', :mp3))
661
+ *
662
+ * # Save speech as mp3 whose bitrate is 128k.
663
+ * File.binwrite('hello_flite_world.mp3',
664
+ * voice.to_speech('Hello Flite World!', :mp3, :bitrate => 128))
665
+ *
666
+ * @param [String] text
667
+ * @param [Symbol] audo_type :wav, :raw or :mp3 (when mp3 support is enabled)
668
+ * @param [Hash] opts audio encoder options
669
+ * @return [String] audio data
670
+ * @see Flite.supported_audio_types
671
+ */
264
672
  static VALUE
265
- rbflite_voice_speech(int argc, VALUE *argv, VALUE self)
673
+ rbflite_voice_to_speech(int argc, VALUE *argv, VALUE self)
266
674
  {
267
675
  rbflite_voice_t *voice = DATA_PTR(self);
268
676
  VALUE text;
269
- VALUE out;
677
+ VALUE audio_type;
678
+ VALUE opts;
270
679
  cst_audio_streaming_info *asi = NULL;
271
- voice_speech_arg_t arg;
680
+ audio_stream_encoder_t *encoder;
681
+ voice_speech_data_t vsd;
682
+ thread_queue_entry_t entry;
683
+ buffer_list_t *list, *list_next;
684
+ size_t size;
685
+ VALUE speech_data;
686
+ char *ptr;
272
687
 
273
688
  if (voice->voice == NULL) {
274
- rb_raise(rb_eRuntimeError, "not initialized");
275
- }
276
-
277
- rb_scan_args(argc, argv, "11", &text, &out);
278
- arg.voice = voice->voice;
279
- arg.text = StringValueCStr(text);
280
- arg.io = Qnil;
281
- arg.state = 0;
282
-
283
- if (NIL_P(out)) {
284
- /* play audio */
285
- arg.outtype = "play";
286
- } else if (rb_respond_to(out, id_write)) {
287
- /* write to an object */
288
- asi = new_audio_streaming_info();
289
- if (asi == NULL) {
290
- rb_raise(rb_eNoMemError, "failed to allocate audio_streaming_info");
291
- }
292
- asi->asc = rbflite_audio_write_cb;
293
- asi->userdata = (void*)&arg;
294
- feat_set(voice->voice->features, "streaming_info", audio_streaming_info_val(asi));
295
- arg.outtype = "stream";
296
- arg.io = out;
689
+ rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
690
+ }
691
+
692
+ rb_scan_args(argc, argv, "12", &text, &audio_type, &opts);
693
+
694
+ if (NIL_P(audio_type)) {
695
+ encoder = &wav_encoder;
297
696
  } else {
298
- /* write to a file */
299
- out = rb_str_export_to_enc(out, rb_filesystem_encoding());
300
- arg.outtype = StringValueCStr(out);
697
+ if (rb_equal(audio_type, sym_wav)) {
698
+ encoder = &wav_encoder;
699
+ } else if (rb_equal(audio_type, sym_raw)) {
700
+ encoder = &raw_encoder;
701
+ #ifdef HAVE_MP3LAME
702
+ } else if (rb_equal(audio_type, sym_mp3)) {
703
+ encoder = &mp3_encoder;
704
+ #endif
705
+ } else {
706
+ rb_raise(rb_eArgError, "unknown audio type");
707
+ }
301
708
  }
302
709
 
303
- rb_thread_call_without_gvl(voice_speech_without_gvl, &arg, NULL, NULL);
304
- RB_GC_GUARD(text);
305
- RB_GC_GUARD(out);
710
+ vsd.voice = voice->voice;
711
+ vsd.text = StringValueCStr(text);
712
+ vsd.outtype = "stream";
713
+ vsd.encoder = NULL;
714
+ vsd.buffer_list = NULL;
715
+ vsd.buffer_list_last = NULL;
716
+ vsd.error = RBFLITE_ERROR_SUCCESS;
306
717
 
307
- if (asi != NULL) {
308
- flite_feat_remove(voice->voice->features, "streaming_info");
309
- if (arg.state != 0) {
310
- rb_jump_tag(arg.state);
718
+ if (encoder->encoder_init) {
719
+ vsd.encoder = encoder->encoder_init(opts);
720
+ }
721
+
722
+ /* write to an object */
723
+ asi = new_audio_streaming_info();
724
+ if (asi == NULL) {
725
+ if (encoder->encoder_fini) {
726
+ encoder->encoder_fini(vsd.encoder);
311
727
  }
728
+ rb_raise(rb_eNoMemError, "failed to allocate audio_streaming_info");
312
729
  }
313
- return self;
730
+ asi->asc = encoder->asc;
731
+ asi->userdata = (void*)&vsd;
732
+
733
+ lock_thread(&voice->queue, &entry);
734
+
735
+ flite_feat_set(voice->voice->features, "streaming_info", audio_streaming_info_val(asi));
736
+ rb_thread_call_without_gvl(voice_speech_without_gvl, &vsd, NULL, NULL);
737
+ flite_feat_remove(voice->voice->features, "streaming_info");
738
+ RB_GC_GUARD(text);
739
+
740
+ unlock_thread(&voice->queue);
741
+
742
+ if (encoder->encoder_fini) {
743
+ encoder->encoder_fini(vsd.encoder);
744
+ }
745
+
746
+ check_error(&vsd);
747
+
748
+ size = 0;
749
+ for (list = vsd.buffer_list; list != NULL; list = list->next) {
750
+ size += list->used;
751
+ }
752
+ speech_data = rb_str_buf_new(size);
753
+ ptr = RSTRING_PTR(speech_data);
754
+ for (list = vsd.buffer_list; list != NULL; list = list_next) {
755
+ memcpy(ptr, list->buf, list->used);
756
+ ptr += list->used;
757
+ list_next = list->next;
758
+ xfree(list);
759
+ }
760
+ rb_str_set_len(speech_data, size);
761
+
762
+ return speech_data;
314
763
  }
315
764
 
765
+ /*
766
+ * @overload name
767
+ *
768
+ * Returns voice name.
769
+ *
770
+ * @example
771
+ * voice = Flite::Voice.new('slt')
772
+ * voice.name => 'slt'
773
+ *
774
+ * # voice loading is a new feature of CMU Flite 2.0.0.
775
+ * voice = Flite::Voice.new('/path/to/cmu_us_fem.flitevox')
776
+ * voice.name => 'cmu_us_fem'
777
+ *
778
+ * @return [String]
779
+ */
316
780
  static VALUE
317
781
  rbflite_voice_name(VALUE self)
318
782
  {
319
783
  rbflite_voice_t *voice = DATA_PTR(self);
320
784
 
321
785
  if (voice->voice == NULL) {
322
- rb_raise(rb_eRuntimeError, "not initialized");
786
+ rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
323
787
  }
324
788
  return rb_usascii_str_new_cstr(voice->voice->name);
325
789
  }
326
790
 
791
+ /*
792
+ * @overload pathname
793
+ *
794
+ * Returns the path of the voice if the voice is a loadable voice.
795
+ * Otherwise, nil.
796
+ *
797
+ * @example
798
+ * voice = Flite::Voice.new
799
+ * voice.pathname => 'kal'
800
+ *
801
+ * # voice loading is a new feature of CMU Flite 2.0.0.
802
+ * voice = Flite::Voice.new('/path/to/cmu_us_aup.flitevox')
803
+ * voice.pathname => '/path/to/cmu_us_aup.flitevox'
804
+ *
805
+ * @return [String]
806
+ */
327
807
  static VALUE
328
808
  rbflite_voice_pathname(VALUE self)
329
809
  {
@@ -331,23 +811,60 @@ rbflite_voice_pathname(VALUE self)
331
811
  const char *pathname;
332
812
 
333
813
  if (voice->voice == NULL) {
334
- rb_raise(rb_eRuntimeError, "not initialized");
814
+ rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
335
815
  }
336
- pathname = get_param_string(voice->voice->features, "pathname", "");
816
+ pathname = flite_get_param_string(voice->voice->features, "pathname", "");
337
817
  if (pathname[0] == '\0') {
338
818
  return Qnil;
339
819
  }
340
820
  return rb_usascii_str_new_cstr(pathname);
341
821
  }
342
822
 
823
+ /*
824
+ * @overload inspect
825
+ *
826
+ * Returns the value as a string for inspection.
827
+ *
828
+ * @return [String]
829
+ * @private
830
+ */
831
+ static VALUE
832
+ rbflite_voice_inspect(VALUE self)
833
+ {
834
+ rbflite_voice_t *voice = DATA_PTR(self);
835
+ const char *class_name = rb_obj_classname(self);
836
+ const char *voice_name;
837
+ const char *pathname;
838
+
839
+ if (voice->voice == NULL) {
840
+ return rb_sprintf("#<%s: not initialized>", class_name);
841
+ }
842
+ voice_name = voice->voice->name;
843
+
844
+ pathname = flite_get_param_string(voice->voice->features, "pathname", "");
845
+ if (pathname[0] == '\0') {
846
+ return rb_sprintf("#<%s: %s>", class_name, voice_name);
847
+ } else {
848
+ return rb_sprintf("#<%s: %s (%s)>", class_name, voice_name, pathname);
849
+ }
850
+ }
851
+
852
+ #ifdef _WIN32
853
+ __declspec(dllexport) void Init_flite(void);
854
+ #endif
855
+
343
856
  void
344
857
  Init_flite(void)
345
858
  {
346
859
  VALUE cmu_flite_version;
347
860
 
348
- id_write = rb_intern("write");
861
+ sym_mp3 = ID2SYM(rb_intern("mp3"));
862
+ sym_raw = ID2SYM(rb_intern("raw"));
863
+ sym_wav = ID2SYM(rb_intern("wav"));
349
864
 
350
865
  rb_mFlite = rb_define_module("Flite");
866
+ rb_eFliteError = rb_define_class_under(rb_mFlite, "Error", rb_eStandardError);
867
+ rb_eFliteRuntimeError = rb_define_class_under(rb_mFlite, "Runtime", rb_eFliteError);
351
868
 
352
869
  cmu_flite_version = rb_usascii_str_new_cstr(FLITE_PROJECT_VERSION);
353
870
  OBJ_FREEZE(cmu_flite_version);
@@ -367,11 +884,15 @@ Init_flite(void)
367
884
  #endif
368
885
 
369
886
  rb_define_singleton_method(rb_mFlite, "list_builtin_voices", flite_s_list_builtin_voices, 0);
887
+ rb_define_singleton_method(rb_mFlite, "supported_audio_types", flite_s_supported_audio_types, 0);
888
+ rb_define_singleton_method(rb_mFlite, "sleep_time_after_speaking=", flite_s_set_sleep_time_after_speaking, 1);
370
889
  rb_cVoice = rb_define_class_under(rb_mFlite, "Voice", rb_cObject);
371
890
  rb_define_alloc_func(rb_cVoice, rbflite_voice_s_allocate);
372
891
 
373
892
  rb_define_method(rb_cVoice, "initialize", rbflite_voice_initialize, -1);
374
- rb_define_method(rb_cVoice, "speech", rbflite_voice_speech, -1);
893
+ rb_define_method(rb_cVoice, "speak", rbflite_voice_speak, 1);
894
+ rb_define_method(rb_cVoice, "to_speech", rbflite_voice_to_speech, -1);
375
895
  rb_define_method(rb_cVoice, "name", rbflite_voice_name, 0);
376
896
  rb_define_method(rb_cVoice, "pathname", rbflite_voice_pathname, 0);
897
+ rb_define_method(rb_cVoice, "inspect", rbflite_voice_inspect, 0);
377
898
  }