voicevox.rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,448 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ffi"
4
+ require "English"
5
+
6
+ class Voicevox
7
+ #
8
+ # voicevox_coreの薄いラッパー。
9
+ #
10
+ module Core
11
+ extend FFI::Library
12
+
13
+ enum :voicevox_result_code,
14
+ {
15
+ voicevox_result_succeed: 0,
16
+ voicevox_result_not_loaded_openjtalk_dict: 1,
17
+ voicevox_result_failed_load_model: 2,
18
+ voicevox_result_failed_get_supported_devices: 3,
19
+ voicevox_result_cant_gpu_support: 4,
20
+ voicevox_result_failed_load_metas: 5,
21
+ voicevox_result_uninitialized_status: 6,
22
+ voicevox_result_invalid_speaker_id: 7,
23
+ voicevox_result_invalid_model_index: 8,
24
+ voicevox_result_inference_failed: 9,
25
+ voicevox_result_failed_extract_full_context_label: 10,
26
+ voicevox_result_invalid_utf8_input: 11,
27
+ voicevox_result_failed_parse_kana: 12,
28
+ voicevox_result_invalid_audio_query: 13
29
+ }.to_a.flatten
30
+
31
+ enum :voicevox_acceleration_mode,
32
+ {
33
+ voicevox_acceleration_mode_auto: 0,
34
+ voicevox_acceleration_mode_cpu: 1,
35
+ voicevox_acceleration_mode_gpu: 2
36
+ }.to_a.flatten
37
+
38
+ class VoicevoxInitializeOptions < FFI::Struct
39
+ layout(
40
+ *{
41
+ acceleration_mode: :voicevox_acceleration_mode,
42
+ cpu_num_threads: :int16,
43
+ load_all_models: :bool,
44
+ openjtalk_dict_path: :pointer
45
+ }.to_a.flatten
46
+ )
47
+ end
48
+
49
+ class VoicevoxAudioQueryOptions < FFI::Struct
50
+ layout :kana, :bool
51
+ end
52
+
53
+ class VoicevoxSynthesisOptions < FFI::Struct
54
+ layout :enable_interrogative_upspeak, :bool
55
+ end
56
+
57
+ class VoicevoxTtsOptions < FFI::Struct
58
+ layout :kana, :bool, :enable_interrogative_upspeak, :bool
59
+ end
60
+ ffi_lib %w[voicevox_core.dll libvoicevox_core.dylib libvoicevox_core.so]
61
+
62
+ attach_function :voicevox_make_default_initialize_options,
63
+ [],
64
+ VoicevoxInitializeOptions.by_value
65
+
66
+ attach_function :voicevox_initialize,
67
+ [VoicevoxInitializeOptions.by_value],
68
+ :voicevox_result_code
69
+
70
+ attach_function :voicevox_load_model, [:int64], :voicevox_result_code
71
+
72
+ attach_function :voicevox_is_gpu_mode, [], :bool
73
+
74
+ attach_function :voicevox_is_model_loaded, [:int64], :bool
75
+
76
+ attach_function :voicevox_finalize, [], :void
77
+
78
+ attach_function :voicevox_get_metas_json, [], :string
79
+
80
+ attach_function :voicevox_get_supported_devices_json, [], :string
81
+
82
+ attach_function :voicevox_get_version, [], :string
83
+
84
+ attach_function :voicevox_predict_duration,
85
+ %i[int64 pointer int32 pointer],
86
+ :voicevox_result_code
87
+
88
+ attach_function :voicevox_predict_intonation,
89
+ %i[
90
+ int64
91
+ pointer
92
+ pointer
93
+ pointer
94
+ pointer
95
+ pointer
96
+ pointer
97
+ int32
98
+ pointer
99
+ ],
100
+ :voicevox_result_code
101
+
102
+ attach_function :voicevox_decode,
103
+ %i[int64 int64 pointer pointer int32 pointer],
104
+ :voicevox_result_code
105
+
106
+ attach_function :voicevox_make_default_audio_query_options,
107
+ [],
108
+ VoicevoxAudioQueryOptions.by_value
109
+
110
+ attach_function :voicevox_audio_query,
111
+ [
112
+ :string,
113
+ :int32,
114
+ VoicevoxAudioQueryOptions.by_value,
115
+ :pointer
116
+ ],
117
+ :voicevox_result_code
118
+
119
+ # attach_function :voicevox_make_default_synthesis_options,
120
+ # [],
121
+ # VoicevoxSynthesisOptions.by_value
122
+
123
+ attach_function :voicevox_synthesis,
124
+ [
125
+ :string,
126
+ :int32,
127
+ VoicevoxSynthesisOptions.by_value,
128
+ :pointer,
129
+ :pointer
130
+ ],
131
+ :voicevox_result_code
132
+
133
+ attach_function :voicevox_make_default_tts_options,
134
+ [],
135
+ VoicevoxTtsOptions.by_value
136
+
137
+ attach_function :voicevox_tts,
138
+ [
139
+ :string,
140
+ :int64,
141
+ VoicevoxTtsOptions.by_value,
142
+ :pointer,
143
+ :pointer
144
+ ],
145
+ :voicevox_result_code
146
+
147
+ attach_function :voicevox_audio_query_json_free, [:pointer], :void
148
+ attach_function :voicevox_wav_free, [:pointer], :void
149
+
150
+ attach_function :voicevox_error_result_to_message,
151
+ [:voicevox_result_code],
152
+ :string
153
+ rescue LoadError
154
+ module Old
155
+ extend FFI::Library
156
+ ffi_lib %w[core.dll libcore.dylib libcore.so]
157
+
158
+ enum :voicevox_result_code,
159
+ [
160
+ :voicevox_result_succeed,
161
+ 0,
162
+ :voicevox_result_not_loaded_openjtalk_dict,
163
+ 1,
164
+ :voicevox_result_failed_load_model,
165
+ 2,
166
+ :voicevox_result_failed_get_supported_devices,
167
+ 3,
168
+ :voicevox_result_cant_gpu_support,
169
+ 4,
170
+ :voicevox_result_failed_load_metas,
171
+ 5,
172
+ :voicevox_result_uninitialized_status,
173
+ 6,
174
+ :voicevox_result_invalid_speaker_id,
175
+ 7,
176
+ :voicevox_result_invalid_model_index,
177
+ 8,
178
+ :voicevox_result_inference_failed,
179
+ 9,
180
+ :voicevox_result_failed_extract_full_context_label,
181
+ 10,
182
+ :voicevox_result_invalid_utf8_input,
183
+ 11,
184
+ :voicevox_result_failed_parse_kana,
185
+ 12
186
+ ]
187
+
188
+ attach_function :initialize, %i[bool int bool], :bool
189
+
190
+ attach_function :load_model, [:int64], :bool
191
+
192
+ attach_function :is_model_loaded, [:int64], :bool
193
+
194
+ attach_function :finalize, [], :void
195
+
196
+ attach_function :metas, [], :string
197
+
198
+ attach_function :last_error_message, [], :string
199
+
200
+ attach_function :supported_devices, [], :string
201
+
202
+ attach_function :yukarin_s_forward,
203
+ %i[int64 pointer pointer pointer],
204
+ :bool
205
+
206
+ attach_function :yukarin_sa_forward,
207
+ %i[
208
+ int64
209
+ pointer
210
+ pointer
211
+ pointer
212
+ pointer
213
+ pointer
214
+ pointer
215
+ pointer
216
+ pointer
217
+ ],
218
+ :bool
219
+
220
+ attach_function :decode_forward,
221
+ %i[int64 int64 pointer pointer pointer pointer],
222
+ :bool
223
+
224
+ attach_function :voicevox_load_openjtalk_dict,
225
+ [:string],
226
+ :voicevox_result_code
227
+
228
+ attach_function :voicevox_tts,
229
+ %i[string int64 pointer pointer],
230
+ :voicevox_result_code
231
+
232
+ attach_function :voicevox_tts_from_kana,
233
+ %i[string int64 pointer pointer],
234
+ :voicevox_result_code
235
+
236
+ attach_function :voicevox_wav_free, [:pointer], :void
237
+
238
+ attach_function :voicevox_error_result_to_message,
239
+ [:voicevox_result_code],
240
+ :string
241
+ rescue LoadError
242
+ raise(
243
+ LoadError,
244
+ "Failed to load voicevox_core! " \
245
+ "(voicevox_core.dll, libvoicevox_core.so, libvoicevox_core.dylib, " \
246
+ "core.dll, libcore.so, libcore.dylib)\n" \
247
+ "Make sure you have installed voicevox_core and its dependencies " \
248
+ "(such as onnxruntime), and that the voicevox_core shared library " \
249
+ "can be found in your library path."
250
+ )
251
+ end
252
+
253
+ module_function
254
+
255
+ # @return [Voicevox::Core::VoicevoxInitializeOptions]
256
+ def voicevox_make_default_initialize_options
257
+ options = VoicevoxInitializeOptions.new
258
+ options[:acceleration_mode] = :voicevox_acceleration_mode_auto
259
+ options[:cpu_num_threads] = 0
260
+ options[:load_all_models] = false
261
+ options[:openjtalk_dict_path] = nil
262
+ options
263
+ end
264
+
265
+ # @param [Voicevox::Core::VoicevoxInitializeOptions]
266
+ # @return [Symbol]
267
+ def voicevox_initialize(options)
268
+ gpu =
269
+ case options[:acceleration_mode]
270
+ when :voicevox_acceleration_mode_auto
271
+ supported_devices = JSON.parse(Old.supported_devices)
272
+ supported_devices["cuda"] || supported_devices["dml"]
273
+ when :voicevox_acceleration_mode_gpu
274
+ true
275
+ when :voicevox_acceleration_mode_cpu
276
+ false
277
+ end
278
+ @is_gpu_mode = gpu
279
+ if Old.initialize(
280
+ gpu,
281
+ options[:cpu_num_threads],
282
+ options[:load_all_models]
283
+ )
284
+ Old.voicevox_load_openjtalk_dict(
285
+ options[:openjtalk_dict_path].read_string
286
+ )
287
+ else
288
+ raise(Old.last_error_message)
289
+ end
290
+ end
291
+
292
+ # @param [Integer] speaker_id
293
+ # @return [Symbol]
294
+ def voicevox_load_model(speaker_id)
295
+ if Old.load_model(speaker_id)
296
+ :voicevox_result_succeed
297
+ else
298
+ raise(Old.last_error_message)
299
+ end
300
+ end
301
+
302
+ # @param [Integer] speaker_id
303
+ # @return [Boolean]
304
+ def voicevox_is_model_loaded(speaker_id)
305
+ Old.is_model_loaded(speaker_id)
306
+ end
307
+
308
+ # @return [Boolean]
309
+ def voicevox_is_gpu_mode
310
+ @is_gpu_mode
311
+ end
312
+
313
+ # @return [void]
314
+ def voicevox_finalize
315
+ Old.finalize
316
+ end
317
+
318
+ # @return [String]
319
+ def voicevox_get_metas_json
320
+ Old.metas
321
+ end
322
+
323
+ # @return [String]
324
+ def voicevox_get_supported_devices_json
325
+ Old.supported_devices
326
+ end
327
+
328
+ # @param [Ingeger] length
329
+ # @param [FFI::Pointer<Integer>] phoneme_list
330
+ # @param [Integer] speaker_id
331
+ # @param [FFI::Pointer<Integer>] output
332
+ # @return [Symbol]
333
+ def voicevox_predict_duration(length, phoneme_list, speaker_id, output)
334
+ speaker_id_ptr = FFI::MemoryPointer.new(:int64)
335
+ speaker_id_ptr.put(:int64, 0, speaker_id)
336
+ if Old.yukarin_s_forward(length, phoneme_list, speaker_id_ptr, output)
337
+ :voicevox_result_succeed
338
+ else
339
+ raise(Old.last_error_message)
340
+ end
341
+ end
342
+
343
+ # @param [Ingeger] length
344
+ # @param [FFI::Pointer<Integer>] phoneme_list
345
+ # @param [FFI::Pointer<Integer>] vowel_phoneme_list
346
+ # @param [FFI::Pointer<Integer>] consonant_phoneme_list
347
+ # @param [FFI::Pointer<Integer>] start_accent_list
348
+ # @param [FFI::Pointer<Integer>] end_accent_list
349
+ # @param [FFI::Pointer<Integer>] start_accent_phrase_list
350
+ # @param [FFI::Pointer<Integer>] end_accent_phrase_list
351
+ # @param [Integer] speaker_id
352
+ # @param [FFI::Pointer<Integer>] output
353
+ # @return [Symbol]
354
+ def voicevox_predict_intonation(
355
+ length,
356
+ vowel_phoneme_list,
357
+ consonant_phoneme_list,
358
+ start_accent_list,
359
+ end_accent_list,
360
+ start_accent_phrase_list,
361
+ end_accent_phrase_list,
362
+ speaker_id,
363
+ output
364
+ )
365
+ speaker_id_ptr = FFI::MemoryPointer.new(:int64)
366
+ speaker_id_ptr.put(:int64, 0, speaker_id)
367
+ if Old.yukarin_sa_forward(
368
+ length,
369
+ vowel_phoneme_list,
370
+ consonant_phoneme_list,
371
+ start_accent_list,
372
+ end_accent_list,
373
+ start_accent_phrase_list,
374
+ end_accent_phrase_list,
375
+ speaker_id_ptr,
376
+ output
377
+ )
378
+ :voicevox_result_succeed
379
+ else
380
+ raise(Old.last_error_message)
381
+ end
382
+ end
383
+
384
+ # @param [Ingeger] length
385
+ # @param [Integer] phoneme_size
386
+ # @param [FFI::Pointer<Float>] f0
387
+ # @param [FFI::Pointer<Float>] phoneme
388
+ # @param [Integer] speaker_id
389
+ # @param [FFI::Pointer<Integer>] output
390
+ # @return [Symbol]
391
+ def voicevox_decode(length, phoneme_size, f0, phoneme, speaker_id, output)
392
+ speaker_id_ptr = FFI::MemoryPointer.new(:int64)
393
+ speaker_id_ptr.put(:int32, 0, speaker_id)
394
+ if Old.decode_forward(
395
+ length,
396
+ phoneme_size,
397
+ f0,
398
+ phoneme,
399
+ speaker_id_ptr,
400
+ output
401
+ )
402
+ :voicevox_result_succeed
403
+ else
404
+ raise(Old.last_error_message)
405
+ end
406
+ end
407
+
408
+ # @param [FFI::Pointer<String>] text
409
+ # @param [Integer] speaker_id
410
+ # @param [Voicevox::Core::VoicevoxTtsOptions] options
411
+ # @param [FFI::Pointer<Integer>] output_binary_size
412
+ # @param [FFI::Pointer<String>] output_wav
413
+ # @return [Symbol]
414
+ def voicevox_tts(text, speaker_id, options, output_binary_size, output_wav)
415
+ if options[:kana]
416
+ Old.voicevox_tts_from_kana(
417
+ text,
418
+ speaker_id,
419
+ output_binary_size,
420
+ output_wav
421
+ )
422
+ else
423
+ Old.voicevox_tts(text, speaker_id, output_binary_size, output_wav)
424
+ end
425
+ end
426
+
427
+ # @param [FFI::Pointer<String>] wav
428
+ def voicevox_wav_free(wav)
429
+ Old.voicevox_wav_free(wav)
430
+ end
431
+
432
+ # @param [Symbol] type
433
+ # @param [String] text
434
+ def voicevox_error_result_to_message(type)
435
+ Old.voicevox_error_result_to_message(type)
436
+ end
437
+
438
+ def voicevox_make_default_tts_options
439
+ options = Voicevox::Core::VoicevoxTtsOptions.new
440
+ options[:kana] = false
441
+ options
442
+ end
443
+ warn(
444
+ "Failed to load new core (voicevox_core.dll, libvoicevox_core.so, libvoicevox_core.dylib), " \
445
+ "using old core (core.dll, libcore.so, libcore.dylib)."
446
+ )
447
+ end
448
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "objspace"
4
+
5
+ class Voicevox
6
+ #
7
+ # Voicevox関連のエラー。
8
+ #
9
+ class Error < StandardError
10
+ end
11
+
12
+ #
13
+ # Voicevoxのコアで発生したエラー。
14
+ #
15
+ class CoreError < Error
16
+ # @return [Symbol] エラーコード。
17
+ attr_reader :code
18
+
19
+ def initialize
20
+ message =
21
+ Voicevox::Core.voicevox_error_result_to_message(
22
+ self.class.code
23
+ ).force_encoding("UTF-8")
24
+ @code = code
25
+ super(message)
26
+ end
27
+
28
+ class << self
29
+ attr_reader :code
30
+
31
+ def from_code(code)
32
+ ObjectSpace
33
+ .each_object(Class)
34
+ .find { |klass| klass < self && klass.code == code }
35
+ .new
36
+ end
37
+ end
38
+
39
+ class NotLoadedOpenjtalkDict < Voicevox::CoreError
40
+ @code = :voicevox_result_not_loaded_openjtalk_dict
41
+ end
42
+ class FailedLoadModel < Voicevox::CoreError
43
+ @code = :voicevox_result_failed_load_model
44
+ end
45
+ class FailedGetSupportedDevices < Voicevox::CoreError
46
+ @code = :voicevox_result_failed_get_supported_devices
47
+ end
48
+ class CantGpuSupport < Voicevox::CoreError
49
+ @code = :voicevox_result_cant_gpu_support
50
+ end
51
+ class FailedLoadMetas < Voicevox::CoreError
52
+ @code = :voicevox_result_failed_load_metas
53
+ end
54
+ class UninitializedStatus < Voicevox::CoreError
55
+ @code = :voicevox_result_uninitialized_status
56
+ end
57
+ class InvalidSpeakerId < Voicevox::CoreError
58
+ @code = :voicevox_result_invalid_speaker_id
59
+ end
60
+ class InvalidModelIndex < Voicevox::CoreError
61
+ @code = :voicevox_result_invalid_model_index
62
+ end
63
+ class InferenceFailed < Voicevox::CoreError
64
+ @code = :voicevox_result_inference_failed
65
+ end
66
+ class FailedExtractFullContextLabel < Voicevox::CoreError
67
+ @code = :voicevox_result_failed_extract_full_context_label
68
+ end
69
+ class InvalidUtf8Input < Voicevox::CoreError
70
+ @code = :voicevox_result_invalid_utf8_input
71
+ end
72
+ class FailedParseKana < Voicevox::CoreError
73
+ @code = :voicevox_result_failed_parse_kana
74
+ end
75
+ class InvalidAudioQuery < Voicevox::CoreError
76
+ @code = :voicevox_result_invalid_audio_query
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Voicevox
4
+ # @return [String] voicevox.rbのバージョン。
5
+ VERSION = "0.1.0"
6
+ end