flite 0.0.3.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -38,6 +38,13 @@
38
38
  #include "rbflite.h"
39
39
  #include <flite/flite_version.h>
40
40
 
41
+ #ifndef MIN
42
+ #define MIN(a, b) ((a) < (b)) ? (a) : (b)
43
+ #endif
44
+ #ifndef MAX
45
+ #define MAX(a, b) ((a) > (b)) ? (a) : (b)
46
+ #endif
47
+
41
48
  #ifdef WORDS_BIGENDIAN
42
49
  #define TO_LE4(num) SWAPINT(num)
43
50
  #defien TO_LE2(num) SWAPSHORT(num)
@@ -46,6 +53,16 @@
46
53
  #define TO_LE2(num) (num)
47
54
  #endif
48
55
 
56
+ #ifdef HAVE_LAME_LAME_H
57
+ #include <lame/lame.h>
58
+ #define HAVE_MP3LAME 1
59
+ #endif
60
+
61
+ #ifdef HAVE_LAME_H
62
+ #include <lame.h>
63
+ #define HAVE_MP3LAME 1
64
+ #endif
65
+
49
66
  #ifdef HAVE_CST_AUDIO_STREAMING_INFO_UTT
50
67
  /* flite 2.0.0 */
51
68
  typedef struct cst_audio_streaming_info_struct *asc_last_arg_t;
@@ -56,6 +73,14 @@ typedef void *asc_last_arg_t;
56
73
  #define ASC_LAST_ARG_TO_USERDATA(last_arg) (last_arg)
57
74
  #endif
58
75
 
76
+ enum rbfile_error {
77
+ RBFLITE_ERROR_SUCCESS,
78
+ RBFLITE_ERROR_OUT_OF_MEMORY,
79
+ RBFLITE_ERROR_LAME_INIT_PARAMS,
80
+ RBFLITE_ERROR_LAME_ENCODE_BUFFER,
81
+ RBFLITE_ERROR_LAME_ENCODE_FLUSH,
82
+ };
83
+
59
84
  void usenglish_init(cst_voice *v);
60
85
  cst_lexicon *cmulex_init(void);
61
86
 
@@ -65,39 +90,166 @@ cst_lexicon *cmu_indic_lex_init(void);
65
90
  void cmu_grapheme_lang_init(cst_voice *v);
66
91
  cst_lexicon *cmu_grapheme_lex_init(void);
67
92
 
93
+ typedef struct thread_queue_entry {
94
+ struct thread_queue_entry *next;
95
+ VALUE thread;
96
+ } thread_queue_entry_t;
97
+
98
+ typedef struct {
99
+ thread_queue_entry_t *head;
100
+ thread_queue_entry_t **tail;
101
+ } thread_queue_t;
102
+
68
103
  typedef struct {
69
104
  cst_voice *voice;
105
+ thread_queue_t queue;
70
106
  } rbflite_voice_t;
71
107
 
108
+ #define MIN_BUFFER_LIST_SIZE (64 * 1024)
109
+ typedef struct buffer_list {
110
+ struct buffer_list *next;
111
+ size_t size;
112
+ size_t used;
113
+ char buf[1];
114
+ } buffer_list_t;
115
+
72
116
  typedef struct {
73
117
  cst_voice *voice;
74
118
  const char *text;
75
119
  const char *outtype;
76
- VALUE io;
77
- int state;
78
- } voice_speech_arg_t;
120
+ void *encoder;
121
+ buffer_list_t *buffer_list;
122
+ buffer_list_t *buffer_list_last;
123
+ enum rbfile_error error;
124
+ } voice_speech_data_t;
79
125
 
80
126
  typedef struct {
81
- VALUE io;
82
- void *data;
83
- long size;
84
- } io_write_arg_t;
127
+ cst_audio_stream_callback asc;
128
+ void *(*encoder_init)(VALUE opts);
129
+ void (*encoder_fini)(void *encoder);
130
+ } audio_stream_encoder_t;
85
131
 
86
132
  static VALUE rb_mFlite;
133
+ static VALUE rb_eFliteError;
134
+ static VALUE rb_eFliteRuntimeError;
87
135
  static VALUE rb_cVoice;
88
- static ID id_write;
136
+ static VALUE sym_mp3;
137
+ static VALUE sym_raw;
138
+ static VALUE sym_wav;
139
+ static struct timeval sleep_time_after_speaking;
140
+
141
+ static buffer_list_t *buffer_list_alloc(size_t size);
142
+ static void check_error(voice_speech_data_t *vsd);
143
+
144
+ static void lock_thread(thread_queue_t *queue, thread_queue_entry_t *entry)
145
+ {
146
+ /* enqueue the current thread to voice->queue. */
147
+ entry->next = NULL;
148
+ *queue->tail = entry;
149
+ queue->tail = &entry->next;
150
+ if (queue->head != entry) {
151
+ /* stop the current thread if other threads run. */
152
+ entry->thread = rb_thread_current();
153
+ rb_thread_stop();
154
+ }
155
+ }
156
+
157
+ static void unlock_thread(thread_queue_t *queue)
158
+ {
159
+ /* dequeue the current thread from voice->queue. */
160
+ queue->head = queue->head->next;
161
+ if (queue->head == NULL) {
162
+ queue->tail = &queue->head;
163
+ } else {
164
+ /* resume the top of blocked threads. */
165
+ rb_thread_wakeup_alive(queue->head->thread);
166
+ }
167
+ }
168
+
169
+ static int add_data(voice_speech_data_t *vsd, const void *data, size_t size)
170
+ {
171
+ buffer_list_t *list;
172
+ size_t rest;
173
+
174
+ if (vsd->buffer_list == NULL) {
175
+ list = buffer_list_alloc(size);
176
+ if (list == NULL) {
177
+ vsd->error = RBFLITE_ERROR_OUT_OF_MEMORY;
178
+ return -1;
179
+ }
180
+ vsd->buffer_list = vsd->buffer_list_last = list;
181
+ }
182
+ list = vsd->buffer_list_last;
183
+ rest = list->size - list->used;
184
+ if (size <= rest) {
185
+ memcpy(list->buf + list->used, data, size);
186
+ list->used += size;
187
+ } else {
188
+ memcpy(list->buf + list->used, data, rest);
189
+ list->used += rest;
190
+ data = (const char*)data + rest;
191
+ size -= rest;
192
+ list = buffer_list_alloc(size);
193
+ if (list == NULL) {
194
+ vsd->error = RBFLITE_ERROR_OUT_OF_MEMORY;
195
+ return -1;
196
+ }
197
+ memcpy(list->buf, data, size);
198
+ list->used = size;
199
+ vsd->buffer_list_last->next = list;
200
+ vsd->buffer_list_last = list;
201
+ }
202
+ return 0;
203
+ }
204
+
205
+ static buffer_list_t *buffer_list_alloc(size_t size)
206
+ {
207
+ size_t alloc_size = MAX(size + offsetof(buffer_list_t, buf), MIN_BUFFER_LIST_SIZE);
208
+ buffer_list_t *list = xmalloc(alloc_size);
209
+
210
+ if (list == NULL) {
211
+ return NULL;
212
+ }
213
+ list->next = NULL;
214
+ list->size = alloc_size - offsetof(buffer_list_t, buf);
215
+ list->used = 0;
216
+ return list;
217
+ }
218
+
219
+ static void check_error(voice_speech_data_t *vsd)
220
+ {
221
+ buffer_list_t *list, *list_next;
222
+
223
+ if (vsd->error == RBFLITE_ERROR_SUCCESS) {
224
+ return;
225
+ }
226
+ for (list = vsd->buffer_list; list != NULL; list = list_next) {
227
+ list_next = list->next;
228
+ xfree(list);
229
+ }
230
+ vsd->buffer_list = NULL;
231
+ switch (vsd->error) {
232
+ case RBFLITE_ERROR_OUT_OF_MEMORY:
233
+ rb_raise(rb_eNoMemError, "out of memory while writing speech data");
234
+ case RBFLITE_ERROR_LAME_INIT_PARAMS:
235
+ rb_raise(rb_eFliteRuntimeError, "lame_init_params() error");
236
+ case RBFLITE_ERROR_LAME_ENCODE_BUFFER:
237
+ rb_raise(rb_eFliteRuntimeError, "lame_encode_buffer() error");
238
+ case RBFLITE_ERROR_LAME_ENCODE_FLUSH:
239
+ rb_raise(rb_eFliteRuntimeError, "lame_encode_flush() error");
240
+ default:
241
+ rb_raise(rb_eFliteRuntimeError, "Unkown error %d", vsd->error);
242
+ }
243
+ }
89
244
 
90
245
  /*
91
- * call graph:
246
+ * Returns builtin voice names.
92
247
  *
93
- * rbflite_audio_write_cb()
94
- * --> rbfile_io_write_protect() via rb_thread_call_with_gvl()
95
- * --> rbfile_io_write() via rb_protect()
248
+ * @example
249
+ * Flite.list_builtin_voices # => ["kal", "awb_time", "kal16", "awb", "rms", "slt"]
250
+ *
251
+ * @return [Array]
96
252
  */
97
- static int rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg);
98
- static void *rbfile_io_write_protect(void *data);
99
- static VALUE rbfile_io_write(VALUE data);
100
-
101
253
  static VALUE
102
254
  flite_s_list_builtin_voices(VALUE klass)
103
255
  {
@@ -112,6 +264,53 @@ flite_s_list_builtin_voices(VALUE klass)
112
264
  return ary;
113
265
  }
114
266
 
267
+ /*
268
+ * Returns supported audio types used as the second argument of {Flite::Voice#to_speech}.
269
+ *
270
+ * @example
271
+ * # Compiled with mp3 support
272
+ * Flite.supported_audio_types # => [:wav, :raw, :mp3]
273
+ *
274
+ * # Compiled without mp3 support
275
+ * Flite.supported_audio_types # => [:wav, :raw]
276
+ *
277
+ * @return [Array]
278
+ */
279
+ static VALUE
280
+ flite_s_supported_audio_types(VALUE klass)
281
+ {
282
+ VALUE ary = rb_ary_new();
283
+
284
+ rb_ary_push(ary, sym_wav);
285
+ rb_ary_push(ary, sym_raw);
286
+ #ifdef HAVE_MP3LAME
287
+ rb_ary_push(ary, sym_mp3);
288
+ #endif
289
+ return ary;
290
+ }
291
+
292
+ /*
293
+ * @overload sleep_time_after_speaking=(sec)
294
+ *
295
+ * Sets sleep time after {Flite::Voice#speak}.
296
+ * The default value is 0 on Unix and 0.3 on Windows.
297
+ *
298
+ * This is workaround for voice cutoff on Windows.
299
+ * The following code speaks "Hello Wor.. Hello World" without
300
+ * 0.3 seconds sleep.
301
+ *
302
+ * "Hello World".speak # The last 0.3 seconds are cut off by the next speech on Windows.
303
+ * "Hello World".speak
304
+ *
305
+ * @param [Float] sec seconds to sleep
306
+ */
307
+ static VALUE
308
+ flite_s_set_sleep_time_after_speaking(VALUE klass, VALUE val)
309
+ {
310
+ sleep_time_after_speaking = rb_time_interval(val);
311
+ return val;
312
+ }
313
+
115
314
  static void
116
315
  rbfile_voice_free(rbflite_voice_t *voice)
117
316
  {
@@ -125,8 +324,10 @@ static VALUE
125
324
  rbflite_voice_s_allocate(VALUE klass)
126
325
  {
127
326
  rbflite_voice_t *voice;
327
+ VALUE obj = Data_Make_Struct(klass, rbflite_voice_t, NULL, rbfile_voice_free, voice);
128
328
 
129
- return Data_Make_Struct(klass, rbflite_voice_t, NULL, rbfile_voice_free, voice);
329
+ voice->queue.tail = &voice->queue.head;
330
+ return obj;
130
331
  }
131
332
 
132
333
  #ifdef HAVE_FLITE_VOICE_LOAD
@@ -137,6 +338,28 @@ rbflite_voice_load(void *data)
137
338
  }
138
339
  #endif
139
340
 
341
+ /*
342
+ * @overload initialize(name = nil)
343
+ *
344
+ * Create a new voice specified by <code>name</code>.
345
+ * If <code>name</code> includes '.' or '/' and ruby flite
346
+ * is compiled for CMU Flite 2.0.0 or upper, try to
347
+ * use a loadable voice.
348
+ *
349
+ * @example
350
+ *
351
+ * # Use default voice. It is 'kal' usually.
352
+ * voice = Flite::Voice.new
353
+ *
354
+ * # Use a builtin voice.
355
+ * voice = Flite::Voice.new('awb')
356
+ *
357
+ * # Use a lodable voice.
358
+ * voice = Flite::Voice.new('/path/to/cmu_us_gka.flitevox')
359
+ *
360
+ * @param [String] name
361
+ * @see Flite.list_builtin_voices
362
+ */
140
363
  static VALUE
141
364
  rbflite_voice_initialize(int argc, VALUE *argv, VALUE self)
142
365
  {
@@ -174,18 +397,15 @@ rbflite_voice_initialize(int argc, VALUE *argv, VALUE self)
174
397
  static void *
175
398
  voice_speech_without_gvl(void *data)
176
399
  {
177
- voice_speech_arg_t *arg = (voice_speech_arg_t *)data;
178
- flite_text_to_speech(arg->text, arg->voice, arg->outtype);
400
+ voice_speech_data_t *vsd = (voice_speech_data_t *)data;
401
+ flite_text_to_speech(vsd->text, vsd->voice, vsd->outtype);
179
402
  return NULL;
180
403
  }
181
404
 
182
405
  static int
183
- rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
406
+ wav_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
184
407
  {
185
- voice_speech_arg_t *ud = (voice_speech_arg_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
186
- io_write_arg_t arg;
187
-
188
- arg.io = ud->io;
408
+ voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
189
409
 
190
410
  if (start == 0) {
191
411
  /* write WAVE file header. */
@@ -227,103 +447,363 @@ rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_las
227
447
  header.bitswidth = TO_LE2(sizeof(short) * 8);
228
448
  header.data_size = TO_LE4(data_size);
229
449
 
230
- arg.data = &header;
231
- arg.size = sizeof(header);
232
- ud->state = (int)(VALUE)rb_thread_call_with_gvl(rbfile_io_write_protect, &arg);
233
- if (ud->state != 0) {
450
+ if (add_data(vsd, &header, sizeof(header)) != 0) {
234
451
  return CST_AUDIO_STREAM_STOP;
235
452
  }
236
453
  }
237
454
 
238
- arg.data = &w->samples[start];
239
- arg.size = size * sizeof(short);
240
- ud->state = (int)(VALUE)rb_thread_call_with_gvl(rbfile_io_write_protect, &arg);
241
- if (ud->state != 0) {
455
+ if (add_data(vsd, &w->samples[start], size * sizeof(short)) != 0) {
242
456
  return CST_AUDIO_STREAM_STOP;
243
457
  }
458
+ return CST_AUDIO_STREAM_CONT;
459
+ }
460
+
461
+ static audio_stream_encoder_t wav_encoder = {
462
+ wav_encoder_cb,
463
+ NULL,
464
+ NULL,
465
+ };
244
466
 
467
+ static int
468
+ raw_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
469
+ {
470
+ voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
471
+
472
+ if (add_data(vsd, &w->samples[start], size * sizeof(short)) != 0) {
473
+ return CST_AUDIO_STREAM_STOP;
474
+ }
245
475
  return CST_AUDIO_STREAM_CONT;
246
476
  }
247
477
 
248
- static void *
249
- rbfile_io_write_protect(void *data)
478
+ static audio_stream_encoder_t raw_encoder = {
479
+ raw_encoder_cb,
480
+ NULL,
481
+ NULL,
482
+ };
483
+
484
+ #ifdef HAVE_MP3LAME
485
+
486
+ #define MAX_SAMPLE_SIZE 1024
487
+ /* "mp3buf_size in bytes = 1.25*num_samples + 7200" according to lame.h. */
488
+ #define MP3BUF_SIZE (MAX_SAMPLE_SIZE + MAX_SAMPLE_SIZE / 4 + 7200)
489
+ static int mp3_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
490
+ {
491
+ voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
492
+ lame_global_flags *gf = vsd->encoder;
493
+ unsigned char mp3buf[MP3BUF_SIZE];
494
+ short *sptr = &w->samples[start];
495
+ short *eptr = sptr + size;
496
+ int rv;
497
+
498
+ if (start == 0) {
499
+ lame_set_num_samples(gf, cst_wave_num_samples(w));
500
+ lame_set_in_samplerate(gf, cst_wave_sample_rate(w));
501
+ lame_set_num_channels(gf, 1);
502
+ lame_set_mode(gf, MONO);
503
+ rv = lame_init_params(gf);
504
+ if (rv == -1) {
505
+ vsd->error = RBFLITE_ERROR_LAME_INIT_PARAMS;
506
+ return CST_AUDIO_STREAM_STOP;
507
+ }
508
+ }
509
+ while (eptr - sptr > MAX_SAMPLE_SIZE) {
510
+ rv = lame_encode_buffer(gf, sptr, NULL, MAX_SAMPLE_SIZE, mp3buf, sizeof(mp3buf));
511
+ if (rv < 0) {
512
+ vsd->error = RBFLITE_ERROR_LAME_ENCODE_BUFFER;
513
+ return CST_AUDIO_STREAM_STOP;
514
+ }
515
+ if (rv > 0) {
516
+ if (add_data(vsd, mp3buf, rv) != 0) {
517
+ return CST_AUDIO_STREAM_STOP;
518
+ }
519
+ }
520
+ sptr += MAX_SAMPLE_SIZE;
521
+ }
522
+ rv = lame_encode_buffer(gf, sptr, NULL, eptr - sptr, mp3buf, sizeof(mp3buf));
523
+ if (rv < 0) {
524
+ vsd->error = RBFLITE_ERROR_LAME_ENCODE_BUFFER;
525
+ return CST_AUDIO_STREAM_STOP;
526
+ }
527
+ if (rv > 0) {
528
+ if (add_data(vsd, mp3buf, rv) != 0) {
529
+ return CST_AUDIO_STREAM_STOP;
530
+ }
531
+ }
532
+ if (last) {
533
+ rv = lame_encode_flush(gf, mp3buf, sizeof(mp3buf));
534
+ if (rv < 0) {
535
+ vsd->error = RBFLITE_ERROR_LAME_ENCODE_FLUSH;
536
+ return CST_AUDIO_STREAM_STOP;
537
+ }
538
+ if (rv > 0) {
539
+ if (add_data(vsd, mp3buf, rv) != 0) {
540
+ return CST_AUDIO_STREAM_STOP;
541
+ }
542
+ }
543
+ }
544
+ return CST_AUDIO_STREAM_CONT;
545
+ }
546
+
547
+ static void *mp3_encoder_init(VALUE opts)
250
548
  {
251
- int state = 0;
252
- rb_protect(rbfile_io_write, (VALUE)data, &state);
253
- return (void*)(VALUE)state;
549
+ lame_global_flags *gf = lame_init();
550
+
551
+ if (gf == NULL) {
552
+ rb_raise(rb_eFliteRuntimeError, "Failed to initialize lame");
553
+ }
554
+
555
+ lame_set_bWriteVbrTag(gf, 0);
556
+ lame_set_brate(gf, 64);
557
+
558
+ if (!NIL_P(opts)) {
559
+ VALUE v;
560
+ Check_Type(opts, T_HASH);
561
+
562
+ v = rb_hash_aref(opts, ID2SYM(rb_intern("bitrate")));
563
+ if (!NIL_P(v)) {
564
+ lame_set_brate(gf, NUM2INT(v));
565
+ }
566
+
567
+ v = rb_hash_aref(opts, ID2SYM(rb_intern("scale")));
568
+ if (!NIL_P(v)) {
569
+ lame_set_scale(gf, NUM2INT(v));
570
+ }
571
+
572
+ v = rb_hash_aref(opts, ID2SYM(rb_intern("quality")));
573
+ if (!NIL_P(v)) {
574
+ lame_set_quality(gf, NUM2INT(v));
575
+ }
576
+ }
577
+
578
+ lame_set_bWriteVbrTag(gf, 0);
579
+ return gf;
580
+ }
581
+
582
+ static void mp3_encoder_fini(void *encoder)
583
+ {
584
+ lame_close(encoder);
254
585
  }
255
586
 
587
+ static audio_stream_encoder_t mp3_encoder = {
588
+ mp3_encoder_cb,
589
+ mp3_encoder_init,
590
+ mp3_encoder_fini,
591
+ };
592
+
593
+ #endif
594
+
595
+ /*
596
+ * @overload speak(text)
597
+ *
598
+ * Speak the <code>text</code>.
599
+ *
600
+ * @example
601
+ * voice = Flite::Voice.new
602
+ *
603
+ * # Speak 'Hello Flite World!'
604
+ * voice.speak('Hello Flite World!')
605
+ *
606
+ * @param [String] text
607
+ */
256
608
  static VALUE
257
- rbfile_io_write(VALUE data)
609
+ rbflite_voice_speak(VALUE self, VALUE text)
258
610
  {
259
- const io_write_arg_t *arg = (const io_write_arg_t *)data;
260
- rb_funcall(arg->io, id_write, 1, rb_str_new(arg->data, arg->size));
261
- return Qnil;
611
+ rbflite_voice_t *voice = DATA_PTR(self);
612
+ voice_speech_data_t vsd;
613
+ thread_queue_entry_t entry;
614
+
615
+ if (voice->voice == NULL) {
616
+ rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
617
+ }
618
+
619
+ vsd.voice = voice->voice;
620
+ vsd.text = StringValueCStr(text);
621
+ vsd.outtype = "play";
622
+ vsd.buffer_list = NULL;
623
+ vsd.buffer_list_last = NULL;
624
+ vsd.error = RBFLITE_ERROR_SUCCESS;
625
+
626
+ lock_thread(&voice->queue, &entry);
627
+
628
+ rb_thread_call_without_gvl(voice_speech_without_gvl, &vsd, NULL, NULL);
629
+ RB_GC_GUARD(text);
630
+
631
+ unlock_thread(&voice->queue);
632
+
633
+ check_error(&vsd);
634
+
635
+ if (sleep_time_after_speaking.tv_sec != 0 || sleep_time_after_speaking.tv_usec != 0) {
636
+ rb_thread_wait_for(sleep_time_after_speaking);
637
+ }
638
+
639
+ return self;
262
640
  }
263
641
 
642
+ /*
643
+ * @overload to_speech(text, audio_type = :wav, opts = {})
644
+ *
645
+ * Converts <code>text</code> to audio data.
646
+ *
647
+ * @example
648
+ * voice = Flite::Voice.new
649
+ *
650
+ * # Save speech as wav
651
+ * File.binwrite('hello_flite_world.wav',
652
+ * voice.to_speech('Hello Flite World!'))
653
+ *
654
+ * # Save speech as raw pcm (signed 16 bit little endian, rate 8000 Hz, mono)
655
+ * File.binwrite('hello_flite_world.raw',
656
+ * voice.to_speech('Hello Flite World!', :raw))
657
+ *
658
+ * # Save speech as mp3
659
+ * File.binwrite('hello_flite_world.mp3',
660
+ * voice.to_speech('Hello Flite World!', :mp3))
661
+ *
662
+ * # Save speech as mp3 whose bitrate is 128k.
663
+ * File.binwrite('hello_flite_world.mp3',
664
+ * voice.to_speech('Hello Flite World!', :mp3, :bitrate => 128))
665
+ *
666
+ * @param [String] text
667
+ * @param [Symbol] audo_type :wav, :raw or :mp3 (when mp3 support is enabled)
668
+ * @param [Hash] opts audio encoder options
669
+ * @return [String] audio data
670
+ * @see Flite.supported_audio_types
671
+ */
264
672
  static VALUE
265
- rbflite_voice_speech(int argc, VALUE *argv, VALUE self)
673
+ rbflite_voice_to_speech(int argc, VALUE *argv, VALUE self)
266
674
  {
267
675
  rbflite_voice_t *voice = DATA_PTR(self);
268
676
  VALUE text;
269
- VALUE out;
677
+ VALUE audio_type;
678
+ VALUE opts;
270
679
  cst_audio_streaming_info *asi = NULL;
271
- voice_speech_arg_t arg;
680
+ audio_stream_encoder_t *encoder;
681
+ voice_speech_data_t vsd;
682
+ thread_queue_entry_t entry;
683
+ buffer_list_t *list, *list_next;
684
+ size_t size;
685
+ VALUE speech_data;
686
+ char *ptr;
272
687
 
273
688
  if (voice->voice == NULL) {
274
- rb_raise(rb_eRuntimeError, "not initialized");
275
- }
276
-
277
- rb_scan_args(argc, argv, "11", &text, &out);
278
- arg.voice = voice->voice;
279
- arg.text = StringValueCStr(text);
280
- arg.io = Qnil;
281
- arg.state = 0;
282
-
283
- if (NIL_P(out)) {
284
- /* play audio */
285
- arg.outtype = "play";
286
- } else if (rb_respond_to(out, id_write)) {
287
- /* write to an object */
288
- asi = new_audio_streaming_info();
289
- if (asi == NULL) {
290
- rb_raise(rb_eNoMemError, "failed to allocate audio_streaming_info");
291
- }
292
- asi->asc = rbflite_audio_write_cb;
293
- asi->userdata = (void*)&arg;
294
- feat_set(voice->voice->features, "streaming_info", audio_streaming_info_val(asi));
295
- arg.outtype = "stream";
296
- arg.io = out;
689
+ rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
690
+ }
691
+
692
+ rb_scan_args(argc, argv, "12", &text, &audio_type, &opts);
693
+
694
+ if (NIL_P(audio_type)) {
695
+ encoder = &wav_encoder;
297
696
  } else {
298
- /* write to a file */
299
- out = rb_str_export_to_enc(out, rb_filesystem_encoding());
300
- arg.outtype = StringValueCStr(out);
697
+ if (rb_equal(audio_type, sym_wav)) {
698
+ encoder = &wav_encoder;
699
+ } else if (rb_equal(audio_type, sym_raw)) {
700
+ encoder = &raw_encoder;
701
+ #ifdef HAVE_MP3LAME
702
+ } else if (rb_equal(audio_type, sym_mp3)) {
703
+ encoder = &mp3_encoder;
704
+ #endif
705
+ } else {
706
+ rb_raise(rb_eArgError, "unknown audio type");
707
+ }
301
708
  }
302
709
 
303
- rb_thread_call_without_gvl(voice_speech_without_gvl, &arg, NULL, NULL);
304
- RB_GC_GUARD(text);
305
- RB_GC_GUARD(out);
710
+ vsd.voice = voice->voice;
711
+ vsd.text = StringValueCStr(text);
712
+ vsd.outtype = "stream";
713
+ vsd.encoder = NULL;
714
+ vsd.buffer_list = NULL;
715
+ vsd.buffer_list_last = NULL;
716
+ vsd.error = RBFLITE_ERROR_SUCCESS;
306
717
 
307
- if (asi != NULL) {
308
- flite_feat_remove(voice->voice->features, "streaming_info");
309
- if (arg.state != 0) {
310
- rb_jump_tag(arg.state);
718
+ if (encoder->encoder_init) {
719
+ vsd.encoder = encoder->encoder_init(opts);
720
+ }
721
+
722
+ /* write to an object */
723
+ asi = new_audio_streaming_info();
724
+ if (asi == NULL) {
725
+ if (encoder->encoder_fini) {
726
+ encoder->encoder_fini(vsd.encoder);
311
727
  }
728
+ rb_raise(rb_eNoMemError, "failed to allocate audio_streaming_info");
312
729
  }
313
- return self;
730
+ asi->asc = encoder->asc;
731
+ asi->userdata = (void*)&vsd;
732
+
733
+ lock_thread(&voice->queue, &entry);
734
+
735
+ flite_feat_set(voice->voice->features, "streaming_info", audio_streaming_info_val(asi));
736
+ rb_thread_call_without_gvl(voice_speech_without_gvl, &vsd, NULL, NULL);
737
+ flite_feat_remove(voice->voice->features, "streaming_info");
738
+ RB_GC_GUARD(text);
739
+
740
+ unlock_thread(&voice->queue);
741
+
742
+ if (encoder->encoder_fini) {
743
+ encoder->encoder_fini(vsd.encoder);
744
+ }
745
+
746
+ check_error(&vsd);
747
+
748
+ size = 0;
749
+ for (list = vsd.buffer_list; list != NULL; list = list->next) {
750
+ size += list->used;
751
+ }
752
+ speech_data = rb_str_buf_new(size);
753
+ ptr = RSTRING_PTR(speech_data);
754
+ for (list = vsd.buffer_list; list != NULL; list = list_next) {
755
+ memcpy(ptr, list->buf, list->used);
756
+ ptr += list->used;
757
+ list_next = list->next;
758
+ xfree(list);
759
+ }
760
+ rb_str_set_len(speech_data, size);
761
+
762
+ return speech_data;
314
763
  }
315
764
 
765
+ /*
766
+ * @overload name
767
+ *
768
+ * Returns voice name.
769
+ *
770
+ * @example
771
+ * voice = Flite::Voice.new('slt')
772
+ * voice.name => 'slt'
773
+ *
774
+ * # voice loading is a new feature of CMU Flite 2.0.0.
775
+ * voice = Flite::Voice.new('/path/to/cmu_us_fem.flitevox')
776
+ * voice.name => 'cmu_us_fem'
777
+ *
778
+ * @return [String]
779
+ */
316
780
  static VALUE
317
781
  rbflite_voice_name(VALUE self)
318
782
  {
319
783
  rbflite_voice_t *voice = DATA_PTR(self);
320
784
 
321
785
  if (voice->voice == NULL) {
322
- rb_raise(rb_eRuntimeError, "not initialized");
786
+ rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
323
787
  }
324
788
  return rb_usascii_str_new_cstr(voice->voice->name);
325
789
  }
326
790
 
791
+ /*
792
+ * @overload pathname
793
+ *
794
+ * Returns the path of the voice if the voice is a loadable voice.
795
+ * Otherwise, nil.
796
+ *
797
+ * @example
798
+ * voice = Flite::Voice.new
799
+ * voice.pathname => 'kal'
800
+ *
801
+ * # voice loading is a new feature of CMU Flite 2.0.0.
802
+ * voice = Flite::Voice.new('/path/to/cmu_us_aup.flitevox')
803
+ * voice.pathname => '/path/to/cmu_us_aup.flitevox'
804
+ *
805
+ * @return [String]
806
+ */
327
807
  static VALUE
328
808
  rbflite_voice_pathname(VALUE self)
329
809
  {
@@ -331,23 +811,60 @@ rbflite_voice_pathname(VALUE self)
331
811
  const char *pathname;
332
812
 
333
813
  if (voice->voice == NULL) {
334
- rb_raise(rb_eRuntimeError, "not initialized");
814
+ rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
335
815
  }
336
- pathname = get_param_string(voice->voice->features, "pathname", "");
816
+ pathname = flite_get_param_string(voice->voice->features, "pathname", "");
337
817
  if (pathname[0] == '\0') {
338
818
  return Qnil;
339
819
  }
340
820
  return rb_usascii_str_new_cstr(pathname);
341
821
  }
342
822
 
823
+ /*
824
+ * @overload inspect
825
+ *
826
+ * Returns the value as a string for inspection.
827
+ *
828
+ * @return [String]
829
+ * @private
830
+ */
831
+ static VALUE
832
+ rbflite_voice_inspect(VALUE self)
833
+ {
834
+ rbflite_voice_t *voice = DATA_PTR(self);
835
+ const char *class_name = rb_obj_classname(self);
836
+ const char *voice_name;
837
+ const char *pathname;
838
+
839
+ if (voice->voice == NULL) {
840
+ return rb_sprintf("#<%s: not initialized>", class_name);
841
+ }
842
+ voice_name = voice->voice->name;
843
+
844
+ pathname = flite_get_param_string(voice->voice->features, "pathname", "");
845
+ if (pathname[0] == '\0') {
846
+ return rb_sprintf("#<%s: %s>", class_name, voice_name);
847
+ } else {
848
+ return rb_sprintf("#<%s: %s (%s)>", class_name, voice_name, pathname);
849
+ }
850
+ }
851
+
852
+ #ifdef _WIN32
853
+ __declspec(dllexport) void Init_flite(void);
854
+ #endif
855
+
343
856
  void
344
857
  Init_flite(void)
345
858
  {
346
859
  VALUE cmu_flite_version;
347
860
 
348
- id_write = rb_intern("write");
861
+ sym_mp3 = ID2SYM(rb_intern("mp3"));
862
+ sym_raw = ID2SYM(rb_intern("raw"));
863
+ sym_wav = ID2SYM(rb_intern("wav"));
349
864
 
350
865
  rb_mFlite = rb_define_module("Flite");
866
+ rb_eFliteError = rb_define_class_under(rb_mFlite, "Error", rb_eStandardError);
867
+ rb_eFliteRuntimeError = rb_define_class_under(rb_mFlite, "Runtime", rb_eFliteError);
351
868
 
352
869
  cmu_flite_version = rb_usascii_str_new_cstr(FLITE_PROJECT_VERSION);
353
870
  OBJ_FREEZE(cmu_flite_version);
@@ -367,11 +884,15 @@ Init_flite(void)
367
884
  #endif
368
885
 
369
886
  rb_define_singleton_method(rb_mFlite, "list_builtin_voices", flite_s_list_builtin_voices, 0);
887
+ rb_define_singleton_method(rb_mFlite, "supported_audio_types", flite_s_supported_audio_types, 0);
888
+ rb_define_singleton_method(rb_mFlite, "sleep_time_after_speaking=", flite_s_set_sleep_time_after_speaking, 1);
370
889
  rb_cVoice = rb_define_class_under(rb_mFlite, "Voice", rb_cObject);
371
890
  rb_define_alloc_func(rb_cVoice, rbflite_voice_s_allocate);
372
891
 
373
892
  rb_define_method(rb_cVoice, "initialize", rbflite_voice_initialize, -1);
374
- rb_define_method(rb_cVoice, "speech", rbflite_voice_speech, -1);
893
+ rb_define_method(rb_cVoice, "speak", rbflite_voice_speak, 1);
894
+ rb_define_method(rb_cVoice, "to_speech", rbflite_voice_to_speech, -1);
375
895
  rb_define_method(rb_cVoice, "name", rbflite_voice_name, 0);
376
896
  rb_define_method(rb_cVoice, "pathname", rbflite_voice_pathname, 0);
897
+ rb_define_method(rb_cVoice, "inspect", rbflite_voice_inspect, 0);
377
898
  }