flite 0.0.3.1-x86-mingw32 → 0.1.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +97 -72
- data/bin/{saytime.rb → saytime} +19 -1
- data/bin/speaking-web-server +121 -0
- data/ext/flite/extconf.rb +40 -3
- data/ext/flite/rbflite.c +604 -83
- data/ext/flite/rbflite.h +10 -0
- data/ext/flite/win32_binary_gem.c +432 -0
- data/flite.gemspec +7 -3
- data/lib/flite.dll +0 -0
- data/lib/flite.rb +45 -2
- data/lib/flite/version.rb +1 -1
- data/lib/flite_200.so +0 -0
- data/lib/flite_210.so +0 -0
- data/lib/flite_cmu_grapheme_lang.dll +0 -0
- data/lib/flite_cmu_grapheme_lex.dll +0 -0
- data/lib/flite_cmu_indic_lang.dll +0 -0
- data/lib/flite_cmu_indic_lex.dll +0 -0
- data/lib/flite_cmu_time_awb.dll +0 -0
- data/lib/flite_cmu_us_awb.dll +0 -0
- data/lib/flite_cmu_us_kal.dll +0 -0
- data/lib/flite_cmu_us_kal16.dll +0 -0
- data/lib/flite_cmu_us_rms.dll +0 -0
- data/lib/flite_cmu_us_slt.dll +0 -0
- data/lib/flite_cmulex.dll +0 -0
- data/lib/flite_usenglish.dll +0 -0
- data/lib/libmp3lame-0.dll +0 -0
- metadata +23 -5
data/ext/flite/rbflite.c
CHANGED
@@ -38,6 +38,13 @@
|
|
38
38
|
#include "rbflite.h"
|
39
39
|
#include <flite/flite_version.h>
|
40
40
|
|
41
|
+
#ifndef MIN
|
42
|
+
#define MIN(a, b) ((a) < (b)) ? (a) : (b)
|
43
|
+
#endif
|
44
|
+
#ifndef MAX
|
45
|
+
#define MAX(a, b) ((a) > (b)) ? (a) : (b)
|
46
|
+
#endif
|
47
|
+
|
41
48
|
#ifdef WORDS_BIGENDIAN
|
42
49
|
#define TO_LE4(num) SWAPINT(num)
|
43
50
|
#defien TO_LE2(num) SWAPSHORT(num)
|
@@ -46,6 +53,16 @@
|
|
46
53
|
#define TO_LE2(num) (num)
|
47
54
|
#endif
|
48
55
|
|
56
|
+
#ifdef HAVE_LAME_LAME_H
|
57
|
+
#include <lame/lame.h>
|
58
|
+
#define HAVE_MP3LAME 1
|
59
|
+
#endif
|
60
|
+
|
61
|
+
#ifdef HAVE_LAME_H
|
62
|
+
#include <lame.h>
|
63
|
+
#define HAVE_MP3LAME 1
|
64
|
+
#endif
|
65
|
+
|
49
66
|
#ifdef HAVE_CST_AUDIO_STREAMING_INFO_UTT
|
50
67
|
/* flite 2.0.0 */
|
51
68
|
typedef struct cst_audio_streaming_info_struct *asc_last_arg_t;
|
@@ -56,6 +73,14 @@ typedef void *asc_last_arg_t;
|
|
56
73
|
#define ASC_LAST_ARG_TO_USERDATA(last_arg) (last_arg)
|
57
74
|
#endif
|
58
75
|
|
76
|
+
enum rbfile_error {
|
77
|
+
RBFLITE_ERROR_SUCCESS,
|
78
|
+
RBFLITE_ERROR_OUT_OF_MEMORY,
|
79
|
+
RBFLITE_ERROR_LAME_INIT_PARAMS,
|
80
|
+
RBFLITE_ERROR_LAME_ENCODE_BUFFER,
|
81
|
+
RBFLITE_ERROR_LAME_ENCODE_FLUSH,
|
82
|
+
};
|
83
|
+
|
59
84
|
void usenglish_init(cst_voice *v);
|
60
85
|
cst_lexicon *cmulex_init(void);
|
61
86
|
|
@@ -65,39 +90,166 @@ cst_lexicon *cmu_indic_lex_init(void);
|
|
65
90
|
void cmu_grapheme_lang_init(cst_voice *v);
|
66
91
|
cst_lexicon *cmu_grapheme_lex_init(void);
|
67
92
|
|
93
|
+
typedef struct thread_queue_entry {
|
94
|
+
struct thread_queue_entry *next;
|
95
|
+
VALUE thread;
|
96
|
+
} thread_queue_entry_t;
|
97
|
+
|
98
|
+
typedef struct {
|
99
|
+
thread_queue_entry_t *head;
|
100
|
+
thread_queue_entry_t **tail;
|
101
|
+
} thread_queue_t;
|
102
|
+
|
68
103
|
typedef struct {
|
69
104
|
cst_voice *voice;
|
105
|
+
thread_queue_t queue;
|
70
106
|
} rbflite_voice_t;
|
71
107
|
|
108
|
+
#define MIN_BUFFER_LIST_SIZE (64 * 1024)
|
109
|
+
typedef struct buffer_list {
|
110
|
+
struct buffer_list *next;
|
111
|
+
size_t size;
|
112
|
+
size_t used;
|
113
|
+
char buf[1];
|
114
|
+
} buffer_list_t;
|
115
|
+
|
72
116
|
typedef struct {
|
73
117
|
cst_voice *voice;
|
74
118
|
const char *text;
|
75
119
|
const char *outtype;
|
76
|
-
|
77
|
-
|
78
|
-
|
120
|
+
void *encoder;
|
121
|
+
buffer_list_t *buffer_list;
|
122
|
+
buffer_list_t *buffer_list_last;
|
123
|
+
enum rbfile_error error;
|
124
|
+
} voice_speech_data_t;
|
79
125
|
|
80
126
|
typedef struct {
|
81
|
-
|
82
|
-
void *
|
83
|
-
|
84
|
-
}
|
127
|
+
cst_audio_stream_callback asc;
|
128
|
+
void *(*encoder_init)(VALUE opts);
|
129
|
+
void (*encoder_fini)(void *encoder);
|
130
|
+
} audio_stream_encoder_t;
|
85
131
|
|
86
132
|
static VALUE rb_mFlite;
|
133
|
+
static VALUE rb_eFliteError;
|
134
|
+
static VALUE rb_eFliteRuntimeError;
|
87
135
|
static VALUE rb_cVoice;
|
88
|
-
static
|
136
|
+
static VALUE sym_mp3;
|
137
|
+
static VALUE sym_raw;
|
138
|
+
static VALUE sym_wav;
|
139
|
+
static struct timeval sleep_time_after_speaking;
|
140
|
+
|
141
|
+
static buffer_list_t *buffer_list_alloc(size_t size);
|
142
|
+
static void check_error(voice_speech_data_t *vsd);
|
143
|
+
|
144
|
+
static void lock_thread(thread_queue_t *queue, thread_queue_entry_t *entry)
|
145
|
+
{
|
146
|
+
/* enqueue the current thread to voice->queue. */
|
147
|
+
entry->next = NULL;
|
148
|
+
*queue->tail = entry;
|
149
|
+
queue->tail = &entry->next;
|
150
|
+
if (queue->head != entry) {
|
151
|
+
/* stop the current thread if other threads run. */
|
152
|
+
entry->thread = rb_thread_current();
|
153
|
+
rb_thread_stop();
|
154
|
+
}
|
155
|
+
}
|
156
|
+
|
157
|
+
static void unlock_thread(thread_queue_t *queue)
|
158
|
+
{
|
159
|
+
/* dequeue the current thread from voice->queue. */
|
160
|
+
queue->head = queue->head->next;
|
161
|
+
if (queue->head == NULL) {
|
162
|
+
queue->tail = &queue->head;
|
163
|
+
} else {
|
164
|
+
/* resume the top of blocked threads. */
|
165
|
+
rb_thread_wakeup_alive(queue->head->thread);
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
static int add_data(voice_speech_data_t *vsd, const void *data, size_t size)
|
170
|
+
{
|
171
|
+
buffer_list_t *list;
|
172
|
+
size_t rest;
|
173
|
+
|
174
|
+
if (vsd->buffer_list == NULL) {
|
175
|
+
list = buffer_list_alloc(size);
|
176
|
+
if (list == NULL) {
|
177
|
+
vsd->error = RBFLITE_ERROR_OUT_OF_MEMORY;
|
178
|
+
return -1;
|
179
|
+
}
|
180
|
+
vsd->buffer_list = vsd->buffer_list_last = list;
|
181
|
+
}
|
182
|
+
list = vsd->buffer_list_last;
|
183
|
+
rest = list->size - list->used;
|
184
|
+
if (size <= rest) {
|
185
|
+
memcpy(list->buf + list->used, data, size);
|
186
|
+
list->used += size;
|
187
|
+
} else {
|
188
|
+
memcpy(list->buf + list->used, data, rest);
|
189
|
+
list->used += rest;
|
190
|
+
data = (const char*)data + rest;
|
191
|
+
size -= rest;
|
192
|
+
list = buffer_list_alloc(size);
|
193
|
+
if (list == NULL) {
|
194
|
+
vsd->error = RBFLITE_ERROR_OUT_OF_MEMORY;
|
195
|
+
return -1;
|
196
|
+
}
|
197
|
+
memcpy(list->buf, data, size);
|
198
|
+
list->used = size;
|
199
|
+
vsd->buffer_list_last->next = list;
|
200
|
+
vsd->buffer_list_last = list;
|
201
|
+
}
|
202
|
+
return 0;
|
203
|
+
}
|
204
|
+
|
205
|
+
static buffer_list_t *buffer_list_alloc(size_t size)
|
206
|
+
{
|
207
|
+
size_t alloc_size = MAX(size + offsetof(buffer_list_t, buf), MIN_BUFFER_LIST_SIZE);
|
208
|
+
buffer_list_t *list = xmalloc(alloc_size);
|
209
|
+
|
210
|
+
if (list == NULL) {
|
211
|
+
return NULL;
|
212
|
+
}
|
213
|
+
list->next = NULL;
|
214
|
+
list->size = alloc_size - offsetof(buffer_list_t, buf);
|
215
|
+
list->used = 0;
|
216
|
+
return list;
|
217
|
+
}
|
218
|
+
|
219
|
+
static void check_error(voice_speech_data_t *vsd)
|
220
|
+
{
|
221
|
+
buffer_list_t *list, *list_next;
|
222
|
+
|
223
|
+
if (vsd->error == RBFLITE_ERROR_SUCCESS) {
|
224
|
+
return;
|
225
|
+
}
|
226
|
+
for (list = vsd->buffer_list; list != NULL; list = list_next) {
|
227
|
+
list_next = list->next;
|
228
|
+
xfree(list);
|
229
|
+
}
|
230
|
+
vsd->buffer_list = NULL;
|
231
|
+
switch (vsd->error) {
|
232
|
+
case RBFLITE_ERROR_OUT_OF_MEMORY:
|
233
|
+
rb_raise(rb_eNoMemError, "out of memory while writing speech data");
|
234
|
+
case RBFLITE_ERROR_LAME_INIT_PARAMS:
|
235
|
+
rb_raise(rb_eFliteRuntimeError, "lame_init_params() error");
|
236
|
+
case RBFLITE_ERROR_LAME_ENCODE_BUFFER:
|
237
|
+
rb_raise(rb_eFliteRuntimeError, "lame_encode_buffer() error");
|
238
|
+
case RBFLITE_ERROR_LAME_ENCODE_FLUSH:
|
239
|
+
rb_raise(rb_eFliteRuntimeError, "lame_encode_flush() error");
|
240
|
+
default:
|
241
|
+
rb_raise(rb_eFliteRuntimeError, "Unkown error %d", vsd->error);
|
242
|
+
}
|
243
|
+
}
|
89
244
|
|
90
245
|
/*
|
91
|
-
*
|
246
|
+
* Returns builtin voice names.
|
92
247
|
*
|
93
|
-
*
|
94
|
-
*
|
95
|
-
*
|
248
|
+
* @example
|
249
|
+
* Flite.list_builtin_voices # => ["kal", "awb_time", "kal16", "awb", "rms", "slt"]
|
250
|
+
*
|
251
|
+
* @return [Array]
|
96
252
|
*/
|
97
|
-
static int rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg);
|
98
|
-
static void *rbfile_io_write_protect(void *data);
|
99
|
-
static VALUE rbfile_io_write(VALUE data);
|
100
|
-
|
101
253
|
static VALUE
|
102
254
|
flite_s_list_builtin_voices(VALUE klass)
|
103
255
|
{
|
@@ -112,6 +264,53 @@ flite_s_list_builtin_voices(VALUE klass)
|
|
112
264
|
return ary;
|
113
265
|
}
|
114
266
|
|
267
|
+
/*
|
268
|
+
* Returns supported audio types used as the second argument of {Flite::Voice#to_speech}.
|
269
|
+
*
|
270
|
+
* @example
|
271
|
+
* # Compiled with mp3 support
|
272
|
+
* Flite.supported_audio_types # => [:wav, :raw, :mp3]
|
273
|
+
*
|
274
|
+
* # Compiled without mp3 support
|
275
|
+
* Flite.supported_audio_types # => [:wav, :raw]
|
276
|
+
*
|
277
|
+
* @return [Array]
|
278
|
+
*/
|
279
|
+
static VALUE
|
280
|
+
flite_s_supported_audio_types(VALUE klass)
|
281
|
+
{
|
282
|
+
VALUE ary = rb_ary_new();
|
283
|
+
|
284
|
+
rb_ary_push(ary, sym_wav);
|
285
|
+
rb_ary_push(ary, sym_raw);
|
286
|
+
#ifdef HAVE_MP3LAME
|
287
|
+
rb_ary_push(ary, sym_mp3);
|
288
|
+
#endif
|
289
|
+
return ary;
|
290
|
+
}
|
291
|
+
|
292
|
+
/*
|
293
|
+
* @overload sleep_time_after_speaking=(sec)
|
294
|
+
*
|
295
|
+
* Sets sleep time after {Flite::Voice#speak}.
|
296
|
+
* The default value is 0 on Unix and 0.3 on Windows.
|
297
|
+
*
|
298
|
+
* This is workaround for voice cutoff on Windows.
|
299
|
+
* The following code speaks "Hello Wor.. Hello World" without
|
300
|
+
* 0.3 seconds sleep.
|
301
|
+
*
|
302
|
+
* "Hello World".speak # The last 0.3 seconds are cut off by the next speech on Windows.
|
303
|
+
* "Hello World".speak
|
304
|
+
*
|
305
|
+
* @param [Float] sec seconds to sleep
|
306
|
+
*/
|
307
|
+
static VALUE
|
308
|
+
flite_s_set_sleep_time_after_speaking(VALUE klass, VALUE val)
|
309
|
+
{
|
310
|
+
sleep_time_after_speaking = rb_time_interval(val);
|
311
|
+
return val;
|
312
|
+
}
|
313
|
+
|
115
314
|
static void
|
116
315
|
rbfile_voice_free(rbflite_voice_t *voice)
|
117
316
|
{
|
@@ -125,8 +324,10 @@ static VALUE
|
|
125
324
|
rbflite_voice_s_allocate(VALUE klass)
|
126
325
|
{
|
127
326
|
rbflite_voice_t *voice;
|
327
|
+
VALUE obj = Data_Make_Struct(klass, rbflite_voice_t, NULL, rbfile_voice_free, voice);
|
128
328
|
|
129
|
-
|
329
|
+
voice->queue.tail = &voice->queue.head;
|
330
|
+
return obj;
|
130
331
|
}
|
131
332
|
|
132
333
|
#ifdef HAVE_FLITE_VOICE_LOAD
|
@@ -137,6 +338,28 @@ rbflite_voice_load(void *data)
|
|
137
338
|
}
|
138
339
|
#endif
|
139
340
|
|
341
|
+
/*
|
342
|
+
* @overload initialize(name = nil)
|
343
|
+
*
|
344
|
+
* Create a new voice specified by <code>name</code>.
|
345
|
+
* If <code>name</code> includes '.' or '/' and ruby flite
|
346
|
+
* is compiled for CMU Flite 2.0.0 or upper, try to
|
347
|
+
* use a loadable voice.
|
348
|
+
*
|
349
|
+
* @example
|
350
|
+
*
|
351
|
+
* # Use default voice. It is 'kal' usually.
|
352
|
+
* voice = Flite::Voice.new
|
353
|
+
*
|
354
|
+
* # Use a builtin voice.
|
355
|
+
* voice = Flite::Voice.new('awb')
|
356
|
+
*
|
357
|
+
* # Use a lodable voice.
|
358
|
+
* voice = Flite::Voice.new('/path/to/cmu_us_gka.flitevox')
|
359
|
+
*
|
360
|
+
* @param [String] name
|
361
|
+
* @see Flite.list_builtin_voices
|
362
|
+
*/
|
140
363
|
static VALUE
|
141
364
|
rbflite_voice_initialize(int argc, VALUE *argv, VALUE self)
|
142
365
|
{
|
@@ -174,18 +397,15 @@ rbflite_voice_initialize(int argc, VALUE *argv, VALUE self)
|
|
174
397
|
static void *
|
175
398
|
voice_speech_without_gvl(void *data)
|
176
399
|
{
|
177
|
-
|
178
|
-
flite_text_to_speech(
|
400
|
+
voice_speech_data_t *vsd = (voice_speech_data_t *)data;
|
401
|
+
flite_text_to_speech(vsd->text, vsd->voice, vsd->outtype);
|
179
402
|
return NULL;
|
180
403
|
}
|
181
404
|
|
182
405
|
static int
|
183
|
-
|
406
|
+
wav_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
|
184
407
|
{
|
185
|
-
|
186
|
-
io_write_arg_t arg;
|
187
|
-
|
188
|
-
arg.io = ud->io;
|
408
|
+
voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
|
189
409
|
|
190
410
|
if (start == 0) {
|
191
411
|
/* write WAVE file header. */
|
@@ -227,103 +447,363 @@ rbflite_audio_write_cb(const cst_wave *w, int start, int size, int last, asc_las
|
|
227
447
|
header.bitswidth = TO_LE2(sizeof(short) * 8);
|
228
448
|
header.data_size = TO_LE4(data_size);
|
229
449
|
|
230
|
-
|
231
|
-
arg.size = sizeof(header);
|
232
|
-
ud->state = (int)(VALUE)rb_thread_call_with_gvl(rbfile_io_write_protect, &arg);
|
233
|
-
if (ud->state != 0) {
|
450
|
+
if (add_data(vsd, &header, sizeof(header)) != 0) {
|
234
451
|
return CST_AUDIO_STREAM_STOP;
|
235
452
|
}
|
236
453
|
}
|
237
454
|
|
238
|
-
|
239
|
-
arg.size = size * sizeof(short);
|
240
|
-
ud->state = (int)(VALUE)rb_thread_call_with_gvl(rbfile_io_write_protect, &arg);
|
241
|
-
if (ud->state != 0) {
|
455
|
+
if (add_data(vsd, &w->samples[start], size * sizeof(short)) != 0) {
|
242
456
|
return CST_AUDIO_STREAM_STOP;
|
243
457
|
}
|
458
|
+
return CST_AUDIO_STREAM_CONT;
|
459
|
+
}
|
460
|
+
|
461
|
+
static audio_stream_encoder_t wav_encoder = {
|
462
|
+
wav_encoder_cb,
|
463
|
+
NULL,
|
464
|
+
NULL,
|
465
|
+
};
|
244
466
|
|
467
|
+
static int
|
468
|
+
raw_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
|
469
|
+
{
|
470
|
+
voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
|
471
|
+
|
472
|
+
if (add_data(vsd, &w->samples[start], size * sizeof(short)) != 0) {
|
473
|
+
return CST_AUDIO_STREAM_STOP;
|
474
|
+
}
|
245
475
|
return CST_AUDIO_STREAM_CONT;
|
246
476
|
}
|
247
477
|
|
248
|
-
static
|
249
|
-
|
478
|
+
static audio_stream_encoder_t raw_encoder = {
|
479
|
+
raw_encoder_cb,
|
480
|
+
NULL,
|
481
|
+
NULL,
|
482
|
+
};
|
483
|
+
|
484
|
+
#ifdef HAVE_MP3LAME
|
485
|
+
|
486
|
+
#define MAX_SAMPLE_SIZE 1024
|
487
|
+
/* "mp3buf_size in bytes = 1.25*num_samples + 7200" according to lame.h. */
|
488
|
+
#define MP3BUF_SIZE (MAX_SAMPLE_SIZE + MAX_SAMPLE_SIZE / 4 + 7200)
|
489
|
+
static int mp3_encoder_cb(const cst_wave *w, int start, int size, int last, asc_last_arg_t last_arg)
|
490
|
+
{
|
491
|
+
voice_speech_data_t *vsd = (voice_speech_data_t *)ASC_LAST_ARG_TO_USERDATA(last_arg);
|
492
|
+
lame_global_flags *gf = vsd->encoder;
|
493
|
+
unsigned char mp3buf[MP3BUF_SIZE];
|
494
|
+
short *sptr = &w->samples[start];
|
495
|
+
short *eptr = sptr + size;
|
496
|
+
int rv;
|
497
|
+
|
498
|
+
if (start == 0) {
|
499
|
+
lame_set_num_samples(gf, cst_wave_num_samples(w));
|
500
|
+
lame_set_in_samplerate(gf, cst_wave_sample_rate(w));
|
501
|
+
lame_set_num_channels(gf, 1);
|
502
|
+
lame_set_mode(gf, MONO);
|
503
|
+
rv = lame_init_params(gf);
|
504
|
+
if (rv == -1) {
|
505
|
+
vsd->error = RBFLITE_ERROR_LAME_INIT_PARAMS;
|
506
|
+
return CST_AUDIO_STREAM_STOP;
|
507
|
+
}
|
508
|
+
}
|
509
|
+
while (eptr - sptr > MAX_SAMPLE_SIZE) {
|
510
|
+
rv = lame_encode_buffer(gf, sptr, NULL, MAX_SAMPLE_SIZE, mp3buf, sizeof(mp3buf));
|
511
|
+
if (rv < 0) {
|
512
|
+
vsd->error = RBFLITE_ERROR_LAME_ENCODE_BUFFER;
|
513
|
+
return CST_AUDIO_STREAM_STOP;
|
514
|
+
}
|
515
|
+
if (rv > 0) {
|
516
|
+
if (add_data(vsd, mp3buf, rv) != 0) {
|
517
|
+
return CST_AUDIO_STREAM_STOP;
|
518
|
+
}
|
519
|
+
}
|
520
|
+
sptr += MAX_SAMPLE_SIZE;
|
521
|
+
}
|
522
|
+
rv = lame_encode_buffer(gf, sptr, NULL, eptr - sptr, mp3buf, sizeof(mp3buf));
|
523
|
+
if (rv < 0) {
|
524
|
+
vsd->error = RBFLITE_ERROR_LAME_ENCODE_BUFFER;
|
525
|
+
return CST_AUDIO_STREAM_STOP;
|
526
|
+
}
|
527
|
+
if (rv > 0) {
|
528
|
+
if (add_data(vsd, mp3buf, rv) != 0) {
|
529
|
+
return CST_AUDIO_STREAM_STOP;
|
530
|
+
}
|
531
|
+
}
|
532
|
+
if (last) {
|
533
|
+
rv = lame_encode_flush(gf, mp3buf, sizeof(mp3buf));
|
534
|
+
if (rv < 0) {
|
535
|
+
vsd->error = RBFLITE_ERROR_LAME_ENCODE_FLUSH;
|
536
|
+
return CST_AUDIO_STREAM_STOP;
|
537
|
+
}
|
538
|
+
if (rv > 0) {
|
539
|
+
if (add_data(vsd, mp3buf, rv) != 0) {
|
540
|
+
return CST_AUDIO_STREAM_STOP;
|
541
|
+
}
|
542
|
+
}
|
543
|
+
}
|
544
|
+
return CST_AUDIO_STREAM_CONT;
|
545
|
+
}
|
546
|
+
|
547
|
+
static void *mp3_encoder_init(VALUE opts)
|
250
548
|
{
|
251
|
-
|
252
|
-
|
253
|
-
|
549
|
+
lame_global_flags *gf = lame_init();
|
550
|
+
|
551
|
+
if (gf == NULL) {
|
552
|
+
rb_raise(rb_eFliteRuntimeError, "Failed to initialize lame");
|
553
|
+
}
|
554
|
+
|
555
|
+
lame_set_bWriteVbrTag(gf, 0);
|
556
|
+
lame_set_brate(gf, 64);
|
557
|
+
|
558
|
+
if (!NIL_P(opts)) {
|
559
|
+
VALUE v;
|
560
|
+
Check_Type(opts, T_HASH);
|
561
|
+
|
562
|
+
v = rb_hash_aref(opts, ID2SYM(rb_intern("bitrate")));
|
563
|
+
if (!NIL_P(v)) {
|
564
|
+
lame_set_brate(gf, NUM2INT(v));
|
565
|
+
}
|
566
|
+
|
567
|
+
v = rb_hash_aref(opts, ID2SYM(rb_intern("scale")));
|
568
|
+
if (!NIL_P(v)) {
|
569
|
+
lame_set_scale(gf, NUM2INT(v));
|
570
|
+
}
|
571
|
+
|
572
|
+
v = rb_hash_aref(opts, ID2SYM(rb_intern("quality")));
|
573
|
+
if (!NIL_P(v)) {
|
574
|
+
lame_set_quality(gf, NUM2INT(v));
|
575
|
+
}
|
576
|
+
}
|
577
|
+
|
578
|
+
lame_set_bWriteVbrTag(gf, 0);
|
579
|
+
return gf;
|
580
|
+
}
|
581
|
+
|
582
|
+
static void mp3_encoder_fini(void *encoder)
|
583
|
+
{
|
584
|
+
lame_close(encoder);
|
254
585
|
}
|
255
586
|
|
587
|
+
static audio_stream_encoder_t mp3_encoder = {
|
588
|
+
mp3_encoder_cb,
|
589
|
+
mp3_encoder_init,
|
590
|
+
mp3_encoder_fini,
|
591
|
+
};
|
592
|
+
|
593
|
+
#endif
|
594
|
+
|
595
|
+
/*
|
596
|
+
* @overload speak(text)
|
597
|
+
*
|
598
|
+
* Speak the <code>text</code>.
|
599
|
+
*
|
600
|
+
* @example
|
601
|
+
* voice = Flite::Voice.new
|
602
|
+
*
|
603
|
+
* # Speak 'Hello Flite World!'
|
604
|
+
* voice.speak('Hello Flite World!')
|
605
|
+
*
|
606
|
+
* @param [String] text
|
607
|
+
*/
|
256
608
|
static VALUE
|
257
|
-
|
609
|
+
rbflite_voice_speak(VALUE self, VALUE text)
|
258
610
|
{
|
259
|
-
|
260
|
-
|
261
|
-
|
611
|
+
rbflite_voice_t *voice = DATA_PTR(self);
|
612
|
+
voice_speech_data_t vsd;
|
613
|
+
thread_queue_entry_t entry;
|
614
|
+
|
615
|
+
if (voice->voice == NULL) {
|
616
|
+
rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
|
617
|
+
}
|
618
|
+
|
619
|
+
vsd.voice = voice->voice;
|
620
|
+
vsd.text = StringValueCStr(text);
|
621
|
+
vsd.outtype = "play";
|
622
|
+
vsd.buffer_list = NULL;
|
623
|
+
vsd.buffer_list_last = NULL;
|
624
|
+
vsd.error = RBFLITE_ERROR_SUCCESS;
|
625
|
+
|
626
|
+
lock_thread(&voice->queue, &entry);
|
627
|
+
|
628
|
+
rb_thread_call_without_gvl(voice_speech_without_gvl, &vsd, NULL, NULL);
|
629
|
+
RB_GC_GUARD(text);
|
630
|
+
|
631
|
+
unlock_thread(&voice->queue);
|
632
|
+
|
633
|
+
check_error(&vsd);
|
634
|
+
|
635
|
+
if (sleep_time_after_speaking.tv_sec != 0 || sleep_time_after_speaking.tv_usec != 0) {
|
636
|
+
rb_thread_wait_for(sleep_time_after_speaking);
|
637
|
+
}
|
638
|
+
|
639
|
+
return self;
|
262
640
|
}
|
263
641
|
|
642
|
+
/*
|
643
|
+
* @overload to_speech(text, audio_type = :wav, opts = {})
|
644
|
+
*
|
645
|
+
* Converts <code>text</code> to audio data.
|
646
|
+
*
|
647
|
+
* @example
|
648
|
+
* voice = Flite::Voice.new
|
649
|
+
*
|
650
|
+
* # Save speech as wav
|
651
|
+
* File.binwrite('hello_flite_world.wav',
|
652
|
+
* voice.to_speech('Hello Flite World!'))
|
653
|
+
*
|
654
|
+
* # Save speech as raw pcm (signed 16 bit little endian, rate 8000 Hz, mono)
|
655
|
+
* File.binwrite('hello_flite_world.raw',
|
656
|
+
* voice.to_speech('Hello Flite World!', :raw))
|
657
|
+
*
|
658
|
+
* # Save speech as mp3
|
659
|
+
* File.binwrite('hello_flite_world.mp3',
|
660
|
+
* voice.to_speech('Hello Flite World!', :mp3))
|
661
|
+
*
|
662
|
+
* # Save speech as mp3 whose bitrate is 128k.
|
663
|
+
* File.binwrite('hello_flite_world.mp3',
|
664
|
+
* voice.to_speech('Hello Flite World!', :mp3, :bitrate => 128))
|
665
|
+
*
|
666
|
+
* @param [String] text
|
667
|
+
* @param [Symbol] audo_type :wav, :raw or :mp3 (when mp3 support is enabled)
|
668
|
+
* @param [Hash] opts audio encoder options
|
669
|
+
* @return [String] audio data
|
670
|
+
* @see Flite.supported_audio_types
|
671
|
+
*/
|
264
672
|
static VALUE
|
265
|
-
|
673
|
+
rbflite_voice_to_speech(int argc, VALUE *argv, VALUE self)
|
266
674
|
{
|
267
675
|
rbflite_voice_t *voice = DATA_PTR(self);
|
268
676
|
VALUE text;
|
269
|
-
VALUE
|
677
|
+
VALUE audio_type;
|
678
|
+
VALUE opts;
|
270
679
|
cst_audio_streaming_info *asi = NULL;
|
271
|
-
|
680
|
+
audio_stream_encoder_t *encoder;
|
681
|
+
voice_speech_data_t vsd;
|
682
|
+
thread_queue_entry_t entry;
|
683
|
+
buffer_list_t *list, *list_next;
|
684
|
+
size_t size;
|
685
|
+
VALUE speech_data;
|
686
|
+
char *ptr;
|
272
687
|
|
273
688
|
if (voice->voice == NULL) {
|
274
|
-
rb_raise(
|
275
|
-
}
|
276
|
-
|
277
|
-
rb_scan_args(argc, argv, "
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
arg.state = 0;
|
282
|
-
|
283
|
-
if (NIL_P(out)) {
|
284
|
-
/* play audio */
|
285
|
-
arg.outtype = "play";
|
286
|
-
} else if (rb_respond_to(out, id_write)) {
|
287
|
-
/* write to an object */
|
288
|
-
asi = new_audio_streaming_info();
|
289
|
-
if (asi == NULL) {
|
290
|
-
rb_raise(rb_eNoMemError, "failed to allocate audio_streaming_info");
|
291
|
-
}
|
292
|
-
asi->asc = rbflite_audio_write_cb;
|
293
|
-
asi->userdata = (void*)&arg;
|
294
|
-
feat_set(voice->voice->features, "streaming_info", audio_streaming_info_val(asi));
|
295
|
-
arg.outtype = "stream";
|
296
|
-
arg.io = out;
|
689
|
+
rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
|
690
|
+
}
|
691
|
+
|
692
|
+
rb_scan_args(argc, argv, "12", &text, &audio_type, &opts);
|
693
|
+
|
694
|
+
if (NIL_P(audio_type)) {
|
695
|
+
encoder = &wav_encoder;
|
297
696
|
} else {
|
298
|
-
|
299
|
-
|
300
|
-
|
697
|
+
if (rb_equal(audio_type, sym_wav)) {
|
698
|
+
encoder = &wav_encoder;
|
699
|
+
} else if (rb_equal(audio_type, sym_raw)) {
|
700
|
+
encoder = &raw_encoder;
|
701
|
+
#ifdef HAVE_MP3LAME
|
702
|
+
} else if (rb_equal(audio_type, sym_mp3)) {
|
703
|
+
encoder = &mp3_encoder;
|
704
|
+
#endif
|
705
|
+
} else {
|
706
|
+
rb_raise(rb_eArgError, "unknown audio type");
|
707
|
+
}
|
301
708
|
}
|
302
709
|
|
303
|
-
|
304
|
-
|
305
|
-
|
710
|
+
vsd.voice = voice->voice;
|
711
|
+
vsd.text = StringValueCStr(text);
|
712
|
+
vsd.outtype = "stream";
|
713
|
+
vsd.encoder = NULL;
|
714
|
+
vsd.buffer_list = NULL;
|
715
|
+
vsd.buffer_list_last = NULL;
|
716
|
+
vsd.error = RBFLITE_ERROR_SUCCESS;
|
306
717
|
|
307
|
-
if (
|
308
|
-
|
309
|
-
|
310
|
-
|
718
|
+
if (encoder->encoder_init) {
|
719
|
+
vsd.encoder = encoder->encoder_init(opts);
|
720
|
+
}
|
721
|
+
|
722
|
+
/* write to an object */
|
723
|
+
asi = new_audio_streaming_info();
|
724
|
+
if (asi == NULL) {
|
725
|
+
if (encoder->encoder_fini) {
|
726
|
+
encoder->encoder_fini(vsd.encoder);
|
311
727
|
}
|
728
|
+
rb_raise(rb_eNoMemError, "failed to allocate audio_streaming_info");
|
312
729
|
}
|
313
|
-
|
730
|
+
asi->asc = encoder->asc;
|
731
|
+
asi->userdata = (void*)&vsd;
|
732
|
+
|
733
|
+
lock_thread(&voice->queue, &entry);
|
734
|
+
|
735
|
+
flite_feat_set(voice->voice->features, "streaming_info", audio_streaming_info_val(asi));
|
736
|
+
rb_thread_call_without_gvl(voice_speech_without_gvl, &vsd, NULL, NULL);
|
737
|
+
flite_feat_remove(voice->voice->features, "streaming_info");
|
738
|
+
RB_GC_GUARD(text);
|
739
|
+
|
740
|
+
unlock_thread(&voice->queue);
|
741
|
+
|
742
|
+
if (encoder->encoder_fini) {
|
743
|
+
encoder->encoder_fini(vsd.encoder);
|
744
|
+
}
|
745
|
+
|
746
|
+
check_error(&vsd);
|
747
|
+
|
748
|
+
size = 0;
|
749
|
+
for (list = vsd.buffer_list; list != NULL; list = list->next) {
|
750
|
+
size += list->used;
|
751
|
+
}
|
752
|
+
speech_data = rb_str_buf_new(size);
|
753
|
+
ptr = RSTRING_PTR(speech_data);
|
754
|
+
for (list = vsd.buffer_list; list != NULL; list = list_next) {
|
755
|
+
memcpy(ptr, list->buf, list->used);
|
756
|
+
ptr += list->used;
|
757
|
+
list_next = list->next;
|
758
|
+
xfree(list);
|
759
|
+
}
|
760
|
+
rb_str_set_len(speech_data, size);
|
761
|
+
|
762
|
+
return speech_data;
|
314
763
|
}
|
315
764
|
|
765
|
+
/*
|
766
|
+
* @overload name
|
767
|
+
*
|
768
|
+
* Returns voice name.
|
769
|
+
*
|
770
|
+
* @example
|
771
|
+
* voice = Flite::Voice.new('slt')
|
772
|
+
* voice.name => 'slt'
|
773
|
+
*
|
774
|
+
* # voice loading is a new feature of CMU Flite 2.0.0.
|
775
|
+
* voice = Flite::Voice.new('/path/to/cmu_us_fem.flitevox')
|
776
|
+
* voice.name => 'cmu_us_fem'
|
777
|
+
*
|
778
|
+
* @return [String]
|
779
|
+
*/
|
316
780
|
static VALUE
|
317
781
|
rbflite_voice_name(VALUE self)
|
318
782
|
{
|
319
783
|
rbflite_voice_t *voice = DATA_PTR(self);
|
320
784
|
|
321
785
|
if (voice->voice == NULL) {
|
322
|
-
rb_raise(
|
786
|
+
rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
|
323
787
|
}
|
324
788
|
return rb_usascii_str_new_cstr(voice->voice->name);
|
325
789
|
}
|
326
790
|
|
791
|
+
/*
|
792
|
+
* @overload pathname
|
793
|
+
*
|
794
|
+
* Returns the path of the voice if the voice is a loadable voice.
|
795
|
+
* Otherwise, nil.
|
796
|
+
*
|
797
|
+
* @example
|
798
|
+
* voice = Flite::Voice.new
|
799
|
+
* voice.pathname => 'kal'
|
800
|
+
*
|
801
|
+
* # voice loading is a new feature of CMU Flite 2.0.0.
|
802
|
+
* voice = Flite::Voice.new('/path/to/cmu_us_aup.flitevox')
|
803
|
+
* voice.pathname => '/path/to/cmu_us_aup.flitevox'
|
804
|
+
*
|
805
|
+
* @return [String]
|
806
|
+
*/
|
327
807
|
static VALUE
|
328
808
|
rbflite_voice_pathname(VALUE self)
|
329
809
|
{
|
@@ -331,23 +811,60 @@ rbflite_voice_pathname(VALUE self)
|
|
331
811
|
const char *pathname;
|
332
812
|
|
333
813
|
if (voice->voice == NULL) {
|
334
|
-
rb_raise(
|
814
|
+
rb_raise(rb_eFliteRuntimeError, "%s is not initialized", rb_obj_classname(self));
|
335
815
|
}
|
336
|
-
pathname =
|
816
|
+
pathname = flite_get_param_string(voice->voice->features, "pathname", "");
|
337
817
|
if (pathname[0] == '\0') {
|
338
818
|
return Qnil;
|
339
819
|
}
|
340
820
|
return rb_usascii_str_new_cstr(pathname);
|
341
821
|
}
|
342
822
|
|
823
|
+
/*
|
824
|
+
* @overload inspect
|
825
|
+
*
|
826
|
+
* Returns the value as a string for inspection.
|
827
|
+
*
|
828
|
+
* @return [String]
|
829
|
+
* @private
|
830
|
+
*/
|
831
|
+
static VALUE
|
832
|
+
rbflite_voice_inspect(VALUE self)
|
833
|
+
{
|
834
|
+
rbflite_voice_t *voice = DATA_PTR(self);
|
835
|
+
const char *class_name = rb_obj_classname(self);
|
836
|
+
const char *voice_name;
|
837
|
+
const char *pathname;
|
838
|
+
|
839
|
+
if (voice->voice == NULL) {
|
840
|
+
return rb_sprintf("#<%s: not initialized>", class_name);
|
841
|
+
}
|
842
|
+
voice_name = voice->voice->name;
|
843
|
+
|
844
|
+
pathname = flite_get_param_string(voice->voice->features, "pathname", "");
|
845
|
+
if (pathname[0] == '\0') {
|
846
|
+
return rb_sprintf("#<%s: %s>", class_name, voice_name);
|
847
|
+
} else {
|
848
|
+
return rb_sprintf("#<%s: %s (%s)>", class_name, voice_name, pathname);
|
849
|
+
}
|
850
|
+
}
|
851
|
+
|
852
|
+
#ifdef _WIN32
|
853
|
+
__declspec(dllexport) void Init_flite(void);
|
854
|
+
#endif
|
855
|
+
|
343
856
|
void
|
344
857
|
Init_flite(void)
|
345
858
|
{
|
346
859
|
VALUE cmu_flite_version;
|
347
860
|
|
348
|
-
|
861
|
+
sym_mp3 = ID2SYM(rb_intern("mp3"));
|
862
|
+
sym_raw = ID2SYM(rb_intern("raw"));
|
863
|
+
sym_wav = ID2SYM(rb_intern("wav"));
|
349
864
|
|
350
865
|
rb_mFlite = rb_define_module("Flite");
|
866
|
+
rb_eFliteError = rb_define_class_under(rb_mFlite, "Error", rb_eStandardError);
|
867
|
+
rb_eFliteRuntimeError = rb_define_class_under(rb_mFlite, "Runtime", rb_eFliteError);
|
351
868
|
|
352
869
|
cmu_flite_version = rb_usascii_str_new_cstr(FLITE_PROJECT_VERSION);
|
353
870
|
OBJ_FREEZE(cmu_flite_version);
|
@@ -367,11 +884,15 @@ Init_flite(void)
|
|
367
884
|
#endif
|
368
885
|
|
369
886
|
rb_define_singleton_method(rb_mFlite, "list_builtin_voices", flite_s_list_builtin_voices, 0);
|
887
|
+
rb_define_singleton_method(rb_mFlite, "supported_audio_types", flite_s_supported_audio_types, 0);
|
888
|
+
rb_define_singleton_method(rb_mFlite, "sleep_time_after_speaking=", flite_s_set_sleep_time_after_speaking, 1);
|
370
889
|
rb_cVoice = rb_define_class_under(rb_mFlite, "Voice", rb_cObject);
|
371
890
|
rb_define_alloc_func(rb_cVoice, rbflite_voice_s_allocate);
|
372
891
|
|
373
892
|
rb_define_method(rb_cVoice, "initialize", rbflite_voice_initialize, -1);
|
374
|
-
rb_define_method(rb_cVoice, "
|
893
|
+
rb_define_method(rb_cVoice, "speak", rbflite_voice_speak, 1);
|
894
|
+
rb_define_method(rb_cVoice, "to_speech", rbflite_voice_to_speech, -1);
|
375
895
|
rb_define_method(rb_cVoice, "name", rbflite_voice_name, 0);
|
376
896
|
rb_define_method(rb_cVoice, "pathname", rbflite_voice_pathname, 0);
|
897
|
+
rb_define_method(rb_cVoice, "inspect", rbflite_voice_inspect, 0);
|
377
898
|
}
|