ibm_watson 1.6.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +42 -4
- data/lib/ibm_watson/assistant_v1.rb +277 -81
- data/lib/ibm_watson/assistant_v2.rb +100 -22
- data/lib/ibm_watson/compare_comply_v1.rb +44 -23
- data/lib/ibm_watson/discovery_v1.rb +132 -14
- data/lib/ibm_watson/discovery_v2.rb +234 -18
- data/lib/ibm_watson/language_translator_v3.rb +59 -27
- data/lib/ibm_watson/natural_language_classifier_v1.rb +3 -2
- data/lib/ibm_watson/natural_language_understanding_v1.rb +705 -14
- data/lib/ibm_watson/personality_insights_v3.rb +29 -18
- data/lib/ibm_watson/speech_to_text_v1.rb +278 -121
- data/lib/ibm_watson/text_to_speech_v1.rb +689 -130
- data/lib/ibm_watson/tone_analyzer_v3.rb +11 -13
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +32 -16
- data/lib/ibm_watson/visual_recognition_v4.rb +67 -23
- data/test/integration/test_assistant_v1.rb +9 -0
- data/test/integration/test_assistant_v2.rb +9 -0
- data/test/integration/test_discovery_v2.rb +29 -0
- data/test/integration/test_natural_language_understanding_v1.rb +134 -1
- data/test/integration/test_text_to_speech_v1.rb +60 -3
- data/test/unit/test_assistant_v1.rb +52 -1
- data/test/unit/test_assistant_v2.rb +51 -0
- data/test/unit/test_discovery_v2.rb +30 -1
- data/test/unit/test_natural_language_understanding_v1.rb +231 -0
- data/test/unit/test_text_to_speech_v1.rb +152 -7
- metadata +12 -11
@@ -13,7 +13,9 @@
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
|
-
|
16
|
+
#
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.31.0-902c9336-20210504-161156
|
18
|
+
#
|
17
19
|
# The IBM Watson™ Text to Speech service provides APIs that use IBM's
|
18
20
|
# speech-synthesis capabilities to synthesize text into natural-sounding speech in a
|
19
21
|
# variety of languages, dialects, and voices. The service supports at least one male or
|
@@ -31,8 +33,12 @@
|
|
31
33
|
# that, when combined, sound like the word. A phonetic translation is based on the SSML
|
32
34
|
# phoneme format for representing a word. You can specify a phonetic translation in
|
33
35
|
# standard International Phonetic Alphabet (IPA) representation or in the proprietary IBM
|
34
|
-
# Symbolic Phonetic Representation (SPR). The Arabic, Chinese, Dutch,
|
35
|
-
# support only IPA.
|
36
|
+
# Symbolic Phonetic Representation (SPR). The Arabic, Chinese, Dutch, Australian English,
|
37
|
+
# and Korean languages support only IPA.
|
38
|
+
#
|
39
|
+
# The service also offers a Tune by Example feature that lets you define custom prompts.
|
40
|
+
# You can also define speaker models to improve the quality of your custom prompts. The
|
41
|
+
# service support custom prompts only for US English custom models and voices.
|
36
42
|
|
37
43
|
require "concurrent"
|
38
44
|
require "erb"
|
@@ -40,7 +46,6 @@ require "json"
|
|
40
46
|
require "ibm_cloud_sdk_core"
|
41
47
|
require_relative "./common.rb"
|
42
48
|
|
43
|
-
# Module for the Watson APIs
|
44
49
|
module IBMWatson
|
45
50
|
##
|
46
51
|
# The Text to Speech V1 service.
|
@@ -109,14 +114,40 @@ module IBMWatson
|
|
109
114
|
# Get a voice.
|
110
115
|
# Gets information about the specified voice. The information includes the name,
|
111
116
|
# language, gender, and other details about the voice. Specify a customization ID to
|
112
|
-
# obtain information for a custom
|
113
|
-
#
|
114
|
-
#
|
117
|
+
# obtain information for a custom model that is defined for the language of the
|
118
|
+
# specified voice. To list information about all available voices, use the **List
|
119
|
+
# voices** method.
|
115
120
|
#
|
116
121
|
# **See also:** [Listing a specific
|
117
122
|
# voice](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices#listVoice).
|
118
|
-
#
|
119
|
-
#
|
123
|
+
#
|
124
|
+
#
|
125
|
+
# ### Important voice updates
|
126
|
+
#
|
127
|
+
# The service's voices underwent significant change on 2 December 2020.
|
128
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
129
|
+
# instead of concatenative.
|
130
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
131
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
132
|
+
# `ar-MS` identifier instead.
|
133
|
+
# * The standard concatenative voices for the following languages are now
|
134
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
135
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
136
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
137
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
138
|
+
# of the service's neural voices.
|
139
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
140
|
+
# for production use.
|
141
|
+
#
|
142
|
+
# The deprecated voices and features will continue to function for at least one year
|
143
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
144
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
145
|
+
# all voice updates, see the [2 December 2020 service
|
146
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
147
|
+
# in the release notes.
|
148
|
+
# @param voice [String] The voice for which information is to be returned. For more information about
|
149
|
+
# specifying a voice, see **Important voice updates** in the method description.
|
150
|
+
# @param customization_id [String] The customization ID (GUID) of a custom model for which information is to be
|
120
151
|
# returned. You must make the request with credentials for the instance of the
|
121
152
|
# service that owns the custom model. Omit the parameter to see information about
|
122
153
|
# the specified voice with no customization.
|
@@ -211,6 +242,30 @@ module IBMWatson
|
|
211
242
|
# formats](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-audioFormats#audioFormats).
|
212
243
|
#
|
213
244
|
#
|
245
|
+
# ### Important voice updates
|
246
|
+
#
|
247
|
+
# The service's voices underwent significant change on 2 December 2020.
|
248
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
249
|
+
# instead of concatenative.
|
250
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
251
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
252
|
+
# `ar-MS` identifier instead.
|
253
|
+
# * The standard concatenative voices for the following languages are now
|
254
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
255
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
256
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
257
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
258
|
+
# of the service's neural voices.
|
259
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
260
|
+
# for production use.
|
261
|
+
#
|
262
|
+
# The deprecated voices and features will continue to function for at least one year
|
263
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
264
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
265
|
+
# all voice updates, see the [2 December 2020 service
|
266
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
267
|
+
# in the release notes.
|
268
|
+
#
|
214
269
|
# ### Warning messages
|
215
270
|
#
|
216
271
|
# If a request includes invalid query parameters, the service returns a `Warnings`
|
@@ -224,9 +279,10 @@ module IBMWatson
|
|
224
279
|
# the `accept` parameter to specify the audio format. For more information about
|
225
280
|
# specifying an audio format, see **Audio formats (accept types)** in the method
|
226
281
|
# description.
|
227
|
-
# @param voice [String] The voice to use for synthesis.
|
228
|
-
#
|
229
|
-
#
|
282
|
+
# @param voice [String] The voice to use for synthesis. For more information about specifying a voice, see
|
283
|
+
# **Important voice updates** in the method description.
|
284
|
+
# @param customization_id [String] The customization ID (GUID) of a custom model to use for the synthesis. If a
|
285
|
+
# custom model is specified, it works only if it matches the language of the
|
230
286
|
# indicated voice. You must make the request with credentials for the instance of
|
231
287
|
# the service that owns the custom model. Omit the parameter to use the specified
|
232
288
|
# voice with no customization.
|
@@ -271,26 +327,50 @@ module IBMWatson
|
|
271
327
|
# Gets the phonetic pronunciation for the specified word. You can request the
|
272
328
|
# pronunciation for a specific format. You can also request the pronunciation for a
|
273
329
|
# specific voice to see the default translation for the language of that voice or
|
274
|
-
# for a specific custom
|
275
|
-
#
|
276
|
-
# **Note:** This method is currently a beta release.
|
330
|
+
# for a specific custom model to see the translation for that model.
|
277
331
|
#
|
278
332
|
# **See also:** [Querying a word from a
|
279
333
|
# language](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordsQueryLanguage).
|
334
|
+
#
|
335
|
+
#
|
336
|
+
# ### Important voice updates
|
337
|
+
#
|
338
|
+
# The service's voices underwent significant change on 2 December 2020.
|
339
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
340
|
+
# instead of concatenative.
|
341
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
342
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
343
|
+
# `ar-MS` identifier instead.
|
344
|
+
# * The standard concatenative voices for the following languages are now
|
345
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
346
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
347
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
348
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
349
|
+
# of the service's neural voices.
|
350
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
351
|
+
# for production use.
|
352
|
+
#
|
353
|
+
# The deprecated voices and features will continue to function for at least one year
|
354
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
355
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
356
|
+
# all voice updates, see the [2 December 2020 service
|
357
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
358
|
+
# in the release notes.
|
280
359
|
# @param text [String] The word for which the pronunciation is requested.
|
281
360
|
# @param voice [String] A voice that specifies the language in which the pronunciation is to be returned.
|
282
361
|
# All voices for the same language (for example, `en-US`) return the same
|
283
|
-
# translation.
|
362
|
+
# translation. For more information about specifying a voice, see **Important voice
|
363
|
+
# updates** in the method description.
|
284
364
|
# @param format [String] The phoneme format in which to return the pronunciation. The Arabic, Chinese,
|
285
|
-
# Dutch, and Korean languages support only IPA. Omit the
|
286
|
-
# pronunciation in the default format.
|
287
|
-
# @param customization_id [String] The customization ID (GUID) of a custom
|
288
|
-
#
|
289
|
-
#
|
290
|
-
#
|
291
|
-
#
|
292
|
-
#
|
293
|
-
#
|
365
|
+
# Dutch, Australian English, and Korean languages support only IPA. Omit the
|
366
|
+
# parameter to obtain the pronunciation in the default format.
|
367
|
+
# @param customization_id [String] The customization ID (GUID) of a custom model for which the pronunciation is to be
|
368
|
+
# returned. The language of a specified custom model must match the language of the
|
369
|
+
# specified voice. If the word is not defined in the specified custom model, the
|
370
|
+
# service returns the default translation for the custom model's language. You must
|
371
|
+
# make the request with credentials for the instance of the service that owns the
|
372
|
+
# custom model. Omit the parameter to see the translation for the specified voice
|
373
|
+
# with no customization.
|
294
374
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
295
375
|
def get_pronunciation(text:, voice: nil, format: nil, customization_id: nil)
|
296
376
|
raise ArgumentError.new("text must be provided") if text.nil?
|
@@ -323,31 +403,54 @@ module IBMWatson
|
|
323
403
|
#########################
|
324
404
|
|
325
405
|
##
|
326
|
-
# @!method
|
406
|
+
# @!method create_custom_model(name:, language: nil, description: nil)
|
327
407
|
# Create a custom model.
|
328
|
-
# Creates a new empty custom
|
408
|
+
# Creates a new empty custom model. You must specify a name for the new custom
|
329
409
|
# model. You can optionally specify the language and a description for the new
|
330
410
|
# model. The model is owned by the instance of the service whose credentials are
|
331
411
|
# used to create it.
|
332
412
|
#
|
333
|
-
# **Note:** This method is currently a beta release.
|
334
|
-
#
|
335
413
|
# **See also:** [Creating a custom
|
336
414
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsCreate).
|
337
|
-
#
|
338
|
-
#
|
339
|
-
#
|
340
|
-
#
|
341
|
-
#
|
342
|
-
#
|
343
|
-
#
|
415
|
+
#
|
416
|
+
#
|
417
|
+
# ### Important voice updates
|
418
|
+
#
|
419
|
+
# The service's voices underwent significant change on 2 December 2020.
|
420
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
421
|
+
# instead of concatenative.
|
422
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
423
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
424
|
+
# `ar-MS` identifier instead.
|
425
|
+
# * The standard concatenative voices for the following languages are now
|
426
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
427
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
428
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
429
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
430
|
+
# of the service's neural voices.
|
431
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
432
|
+
# for production use.
|
433
|
+
#
|
434
|
+
# The deprecated voices and features will continue to function for at least one year
|
435
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
436
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
437
|
+
# all voice updates, see the [2 December 2020 service
|
438
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
439
|
+
# in the release notes.
|
440
|
+
# @param name [String] The name of the new custom model.
|
441
|
+
# @param language [String] The language of the new custom model. You create a custom model for a specific
|
442
|
+
# language, not for a specific voice. A custom model can be used with any voice for
|
443
|
+
# its specified language. Omit the parameter to use the the default language,
|
444
|
+
# `en-US`. **Note:** The `ar-AR` language identifier cannot be used to create a
|
445
|
+
# custom model. Use the `ar-MS` identifier instead.
|
446
|
+
# @param description [String] A description of the new custom model. Specifying a description is recommended.
|
344
447
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
345
|
-
def
|
448
|
+
def create_custom_model(name:, language: nil, description: nil)
|
346
449
|
raise ArgumentError.new("name must be provided") if name.nil?
|
347
450
|
|
348
451
|
headers = {
|
349
452
|
}
|
350
|
-
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "
|
453
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "create_custom_model")
|
351
454
|
headers.merge!(sdk_headers)
|
352
455
|
|
353
456
|
data = {
|
@@ -369,27 +472,25 @@ module IBMWatson
|
|
369
472
|
end
|
370
473
|
|
371
474
|
##
|
372
|
-
# @!method
|
475
|
+
# @!method list_custom_models(language: nil)
|
373
476
|
# List custom models.
|
374
|
-
# Lists metadata such as the name and description for all custom
|
375
|
-
#
|
376
|
-
#
|
377
|
-
# specific
|
477
|
+
# Lists metadata such as the name and description for all custom models that are
|
478
|
+
# owned by an instance of the service. Specify a language to list the custom models
|
479
|
+
# for that language only. To see the words and prompts in addition to the metadata
|
480
|
+
# for a specific custom model, use the **Get a custom model** method. You must use
|
378
481
|
# credentials for the instance of the service that owns a model to list information
|
379
482
|
# about it.
|
380
483
|
#
|
381
|
-
# **Note:** This method is currently a beta release.
|
382
|
-
#
|
383
484
|
# **See also:** [Querying all custom
|
384
485
|
# models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsQueryAll).
|
385
|
-
# @param language [String] The language for which custom
|
386
|
-
#
|
387
|
-
#
|
486
|
+
# @param language [String] The language for which custom models that are owned by the requesting credentials
|
487
|
+
# are to be returned. Omit the parameter to see all custom models that are owned by
|
488
|
+
# the requester.
|
388
489
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
389
|
-
def
|
490
|
+
def list_custom_models(language: nil)
|
390
491
|
headers = {
|
391
492
|
}
|
392
|
-
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "
|
493
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "list_custom_models")
|
393
494
|
headers.merge!(sdk_headers)
|
394
495
|
|
395
496
|
params = {
|
@@ -409,14 +510,14 @@ module IBMWatson
|
|
409
510
|
end
|
410
511
|
|
411
512
|
##
|
412
|
-
# @!method
|
513
|
+
# @!method update_custom_model(customization_id:, name: nil, description: nil, words: nil)
|
413
514
|
# Update a custom model.
|
414
|
-
# Updates information for the specified custom
|
415
|
-
#
|
416
|
-
#
|
417
|
-
#
|
418
|
-
#
|
419
|
-
#
|
515
|
+
# Updates information for the specified custom model. You can update metadata such
|
516
|
+
# as the name and description of the model. You can also update the words in the
|
517
|
+
# model and their translations. Adding a new translation for a word that already
|
518
|
+
# exists in a custom model overwrites the word's existing translation. A custom
|
519
|
+
# model can contain no more than 20,000 entries. You must use credentials for the
|
520
|
+
# instance of the service that owns a model to update it.
|
420
521
|
#
|
421
522
|
# You can define sounds-like or phonetic translations for words. A sounds-like
|
422
523
|
# translation consists of one or more words that, when combined, sound like the
|
@@ -432,8 +533,6 @@ module IBMWatson
|
|
432
533
|
# <code><phoneme alphabet="ibm"
|
433
534
|
# ph="1gAstroEntxrYFXs"></phoneme></code>
|
434
535
|
#
|
435
|
-
# **Note:** This method is currently a beta release.
|
436
|
-
#
|
437
536
|
# **See also:**
|
438
537
|
# * [Updating a custom
|
439
538
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsUpdate)
|
@@ -441,20 +540,20 @@ module IBMWatson
|
|
441
540
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuJapaneseAdd)
|
442
541
|
# * [Understanding
|
443
542
|
# customization](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customIntro#customIntro).
|
444
|
-
# @param customization_id [String] The customization ID (GUID) of the custom
|
445
|
-
#
|
446
|
-
# @param name [String] A new name for the custom
|
447
|
-
# @param description [String] A new description for the custom
|
543
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
544
|
+
# credentials for the instance of the service that owns the custom model.
|
545
|
+
# @param name [String] A new name for the custom model.
|
546
|
+
# @param description [String] A new description for the custom model.
|
448
547
|
# @param words [Array[Word]] An array of `Word` objects that provides the words and their translations that are
|
449
|
-
# to be added or updated for the custom
|
548
|
+
# to be added or updated for the custom model. Pass an empty array to make no
|
450
549
|
# additions or updates.
|
451
550
|
# @return [nil]
|
452
|
-
def
|
551
|
+
def update_custom_model(customization_id:, name: nil, description: nil, words: nil)
|
453
552
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
454
553
|
|
455
554
|
headers = {
|
456
555
|
}
|
457
|
-
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "
|
556
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "update_custom_model")
|
458
557
|
headers.merge!(sdk_headers)
|
459
558
|
|
460
559
|
data = {
|
@@ -476,26 +575,25 @@ module IBMWatson
|
|
476
575
|
end
|
477
576
|
|
478
577
|
##
|
479
|
-
# @!method
|
578
|
+
# @!method get_custom_model(customization_id:)
|
480
579
|
# Get a custom model.
|
481
|
-
# Gets all information about a specified custom
|
482
|
-
#
|
483
|
-
#
|
484
|
-
#
|
485
|
-
#
|
486
|
-
# **Note:** This method is currently a beta release.
|
580
|
+
# Gets all information about a specified custom model. In addition to metadata such
|
581
|
+
# as the name and description of the custom model, the output includes the words and
|
582
|
+
# their translations that are defined for the model, as well as any prompts that are
|
583
|
+
# defined for the model. To see just the metadata for a model, use the **List custom
|
584
|
+
# models** method.
|
487
585
|
#
|
488
586
|
# **See also:** [Querying a custom
|
489
587
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsQuery).
|
490
|
-
# @param customization_id [String] The customization ID (GUID) of the custom
|
491
|
-
#
|
588
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
589
|
+
# credentials for the instance of the service that owns the custom model.
|
492
590
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
493
|
-
def
|
591
|
+
def get_custom_model(customization_id:)
|
494
592
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
495
593
|
|
496
594
|
headers = {
|
497
595
|
}
|
498
|
-
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "
|
596
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "get_custom_model")
|
499
597
|
headers.merge!(sdk_headers)
|
500
598
|
|
501
599
|
method_url = "/v1/customizations/%s" % [ERB::Util.url_encode(customization_id)]
|
@@ -510,24 +608,22 @@ module IBMWatson
|
|
510
608
|
end
|
511
609
|
|
512
610
|
##
|
513
|
-
# @!method
|
611
|
+
# @!method delete_custom_model(customization_id:)
|
514
612
|
# Delete a custom model.
|
515
|
-
# Deletes the specified custom
|
516
|
-
#
|
517
|
-
#
|
518
|
-
# **Note:** This method is currently a beta release.
|
613
|
+
# Deletes the specified custom model. You must use credentials for the instance of
|
614
|
+
# the service that owns a model to delete it.
|
519
615
|
#
|
520
616
|
# **See also:** [Deleting a custom
|
521
617
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsDelete).
|
522
|
-
# @param customization_id [String] The customization ID (GUID) of the custom
|
523
|
-
#
|
618
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
619
|
+
# credentials for the instance of the service that owns the custom model.
|
524
620
|
# @return [nil]
|
525
|
-
def
|
621
|
+
def delete_custom_model(customization_id:)
|
526
622
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
527
623
|
|
528
624
|
headers = {
|
529
625
|
}
|
530
|
-
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "
|
626
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "delete_custom_model")
|
531
627
|
headers.merge!(sdk_headers)
|
532
628
|
|
533
629
|
method_url = "/v1/customizations/%s" % [ERB::Util.url_encode(customization_id)]
|
@@ -547,7 +643,7 @@ module IBMWatson
|
|
547
643
|
##
|
548
644
|
# @!method add_words(customization_id:, words:)
|
549
645
|
# Add custom words.
|
550
|
-
# Adds one or more words and their translations to the specified custom
|
646
|
+
# Adds one or more words and their translations to the specified custom model.
|
551
647
|
# Adding a new translation for a word that already exists in a custom model
|
552
648
|
# overwrites the word's existing translation. A custom model can contain no more
|
553
649
|
# than 20,000 entries. You must use credentials for the instance of the service that
|
@@ -567,8 +663,6 @@ module IBMWatson
|
|
567
663
|
# <code><phoneme alphabet="ibm"
|
568
664
|
# ph="1gAstroEntxrYFXs"></phoneme></code>
|
569
665
|
#
|
570
|
-
# **Note:** This method is currently a beta release.
|
571
|
-
#
|
572
666
|
# **See also:**
|
573
667
|
# * [Adding multiple words to a custom
|
574
668
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordsAdd)
|
@@ -576,15 +670,15 @@ module IBMWatson
|
|
576
670
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuJapaneseAdd)
|
577
671
|
# * [Understanding
|
578
672
|
# customization](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customIntro#customIntro).
|
579
|
-
# @param customization_id [String] The customization ID (GUID) of the custom
|
580
|
-
#
|
673
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
674
|
+
# credentials for the instance of the service that owns the custom model.
|
581
675
|
# @param words [Array[Word]] The **Add custom words** method accepts an array of `Word` objects. Each object
|
582
|
-
# provides a word that is to be added or updated for the custom
|
583
|
-
#
|
676
|
+
# provides a word that is to be added or updated for the custom model and the word's
|
677
|
+
# translation.
|
584
678
|
#
|
585
679
|
# The **List custom words** method returns an array of `Word` objects. Each object
|
586
|
-
# shows a word and its translation from the custom
|
587
|
-
#
|
680
|
+
# shows a word and its translation from the custom model. The words are listed in
|
681
|
+
# alphabetical order, with uppercase letters listed before lowercase letters. The
|
588
682
|
# array is empty if the custom model contains no words.
|
589
683
|
# @return [nil]
|
590
684
|
def add_words(customization_id:, words:)
|
@@ -616,17 +710,14 @@ module IBMWatson
|
|
616
710
|
##
|
617
711
|
# @!method list_words(customization_id:)
|
618
712
|
# List custom words.
|
619
|
-
# Lists all of the words and their translations for the specified custom
|
620
|
-
#
|
621
|
-
#
|
622
|
-
# words.
|
623
|
-
#
|
624
|
-
# **Note:** This method is currently a beta release.
|
713
|
+
# Lists all of the words and their translations for the specified custom model. The
|
714
|
+
# output shows the translations as they are defined in the model. You must use
|
715
|
+
# credentials for the instance of the service that owns a model to list its words.
|
625
716
|
#
|
626
717
|
# **See also:** [Querying all words from a custom
|
627
718
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordsQueryModel).
|
628
|
-
# @param customization_id [String] The customization ID (GUID) of the custom
|
629
|
-
#
|
719
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
720
|
+
# credentials for the instance of the service that owns the custom model.
|
630
721
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
631
722
|
def list_words(customization_id:)
|
632
723
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -650,11 +741,11 @@ module IBMWatson
|
|
650
741
|
##
|
651
742
|
# @!method add_word(customization_id:, word:, translation:, part_of_speech: nil)
|
652
743
|
# Add a custom word.
|
653
|
-
# Adds a single word and its translation to the specified custom
|
654
|
-
#
|
655
|
-
#
|
656
|
-
#
|
657
|
-
#
|
744
|
+
# Adds a single word and its translation to the specified custom model. Adding a new
|
745
|
+
# translation for a word that already exists in a custom model overwrites the word's
|
746
|
+
# existing translation. A custom model can contain no more than 20,000 entries. You
|
747
|
+
# must use credentials for the instance of the service that owns a model to add a
|
748
|
+
# word to it.
|
658
749
|
#
|
659
750
|
# You can define sounds-like or phonetic translations for words. A sounds-like
|
660
751
|
# translation consists of one or more words that, when combined, sound like the
|
@@ -670,8 +761,6 @@ module IBMWatson
|
|
670
761
|
# <code><phoneme alphabet="ibm"
|
671
762
|
# ph="1gAstroEntxrYFXs"></phoneme></code>
|
672
763
|
#
|
673
|
-
# **Note:** This method is currently a beta release.
|
674
|
-
#
|
675
764
|
# **See also:**
|
676
765
|
# * [Adding a single word to a custom
|
677
766
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordAdd)
|
@@ -679,14 +768,14 @@ module IBMWatson
|
|
679
768
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuJapaneseAdd)
|
680
769
|
# * [Understanding
|
681
770
|
# customization](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customIntro#customIntro).
|
682
|
-
# @param customization_id [String] The customization ID (GUID) of the custom
|
683
|
-
#
|
684
|
-
# @param word [String] The word that is to be added or updated for the custom
|
771
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
772
|
+
# credentials for the instance of the service that owns the custom model.
|
773
|
+
# @param word [String] The word that is to be added or updated for the custom model.
|
685
774
|
# @param translation [String] The phonetic or sounds-like translation for the word. A phonetic translation is
|
686
775
|
# based on the SSML format for representing the phonetic string of a word either as
|
687
|
-
# an IPA translation or as an IBM SPR translation. The Arabic, Chinese, Dutch,
|
688
|
-
# Korean languages support only IPA. A sounds-like is one or
|
689
|
-
# combined, sound like the word.
|
776
|
+
# an IPA translation or as an IBM SPR translation. The Arabic, Chinese, Dutch,
|
777
|
+
# Australian English, and Korean languages support only IPA. A sounds-like is one or
|
778
|
+
# more words that, when combined, sound like the word.
|
690
779
|
# @param part_of_speech [String] **Japanese only.** The part of speech for the word. The service uses the value to
|
691
780
|
# produce the correct intonation for the word. You can create only a single entry,
|
692
781
|
# with or without a single part of speech, for any word; you cannot create multiple
|
@@ -730,13 +819,11 @@ module IBMWatson
|
|
730
819
|
# shows the translation as it is defined in the model. You must use credentials for
|
731
820
|
# the instance of the service that owns a model to list its words.
|
732
821
|
#
|
733
|
-
# **Note:** This method is currently a beta release.
|
734
|
-
#
|
735
822
|
# **See also:** [Querying a single word from a custom
|
736
823
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordQueryModel).
|
737
|
-
# @param customization_id [String] The customization ID (GUID) of the custom
|
738
|
-
#
|
739
|
-
# @param word [String] The word that is to be queried from the custom
|
824
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
825
|
+
# credentials for the instance of the service that owns the custom model.
|
826
|
+
# @param word [String] The word that is to be queried from the custom model.
|
740
827
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
741
828
|
def get_word(customization_id:, word:)
|
742
829
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -762,17 +849,14 @@ module IBMWatson
|
|
762
849
|
##
|
763
850
|
# @!method delete_word(customization_id:, word:)
|
764
851
|
# Delete a custom word.
|
765
|
-
# Deletes a single word from the specified custom
|
766
|
-
#
|
767
|
-
#
|
768
|
-
#
|
769
|
-
# **Note:** This method is currently a beta release.
|
852
|
+
# Deletes a single word from the specified custom model. You must use credentials
|
853
|
+
# for the instance of the service that owns a model to delete its words.
|
770
854
|
#
|
771
855
|
# **See also:** [Deleting a word from a custom
|
772
856
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordDelete).
|
773
|
-
# @param customization_id [String] The customization ID (GUID) of the custom
|
774
|
-
#
|
775
|
-
# @param word [String] The word that is to be deleted from the custom
|
857
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
858
|
+
# credentials for the instance of the service that owns the custom model.
|
859
|
+
# @param word [String] The word that is to be deleted from the custom model.
|
776
860
|
# @return [nil]
|
777
861
|
def delete_word(customization_id:, word:)
|
778
862
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -795,6 +879,481 @@ module IBMWatson
|
|
795
879
|
nil
|
796
880
|
end
|
797
881
|
#########################
|
882
|
+
# Custom prompts
|
883
|
+
#########################
|
884
|
+
|
885
|
+
##
|
886
|
+
# @!method list_custom_prompts(customization_id:)
|
887
|
+
# List custom prompts.
|
888
|
+
# Lists information about all custom prompts that are defined for a custom model.
|
889
|
+
# The information includes the prompt ID, prompt text, status, and optional speaker
|
890
|
+
# ID for each prompt of the custom model. You must use credentials for the instance
|
891
|
+
# of the service that owns the custom model. The same information about all of the
|
892
|
+
# prompts for a custom model is also provided by the **Get a custom model** method.
|
893
|
+
# That method provides complete details about a specified custom model, including
|
894
|
+
# its language, owner, custom words, and more.
|
895
|
+
#
|
896
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
897
|
+
# with US English custom models and voices.
|
898
|
+
#
|
899
|
+
# **See also:** [Listing custom
|
900
|
+
# prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list).
|
901
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
902
|
+
# credentials for the instance of the service that owns the custom model.
|
903
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
904
|
+
def list_custom_prompts(customization_id:)
|
905
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
906
|
+
|
907
|
+
headers = {
|
908
|
+
}
|
909
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "list_custom_prompts")
|
910
|
+
headers.merge!(sdk_headers)
|
911
|
+
|
912
|
+
method_url = "/v1/customizations/%s/prompts" % [ERB::Util.url_encode(customization_id)]
|
913
|
+
|
914
|
+
response = request(
|
915
|
+
method: "GET",
|
916
|
+
url: method_url,
|
917
|
+
headers: headers,
|
918
|
+
accept_json: true
|
919
|
+
)
|
920
|
+
response
|
921
|
+
end
|
922
|
+
|
923
|
+
##
|
924
|
+
# @!method add_custom_prompt(customization_id:, prompt_id:, metadata:, file:, filename: nil)
|
925
|
+
# Add a custom prompt.
|
926
|
+
# Adds a custom prompt to a custom model. A prompt is defined by the text that is to
|
927
|
+
# be spoken, the audio for that text, a unique user-specified ID for the prompt, and
|
928
|
+
# an optional speaker ID. The information is used to generate prosodic data that is
|
929
|
+
# not visible to the user. This data is used by the service to produce the
|
930
|
+
# synthesized audio upon request. You must use credentials for the instance of the
|
931
|
+
# service that owns a custom model to add a prompt to it. You can add a maximum of
|
932
|
+
# 1000 custom prompts to a single custom model.
|
933
|
+
#
|
934
|
+
# You are recommended to assign meaningful values for prompt IDs. For example, use
|
935
|
+
# `goodbye` to identify a prompt that speaks a farewell message. Prompt IDs must be
|
936
|
+
# unique within a given custom model. You cannot define two prompts with the same
|
937
|
+
# name for the same custom model. If you provide the ID of an existing prompt, the
|
938
|
+
# previously uploaded prompt is replaced by the new information. The existing prompt
|
939
|
+
# is reprocessed by using the new text and audio and, if provided, new speaker
|
940
|
+
# model, and the prosody data associated with the prompt is updated.
|
941
|
+
#
|
942
|
+
# The quality of a prompt is undefined if the language of a prompt does not match
|
943
|
+
# the language of its custom model. This is consistent with any text or SSML that is
|
944
|
+
# specified for a speech synthesis request. The service makes a best-effort attempt
|
945
|
+
# to render the specified text for the prompt; it does not validate that the
|
946
|
+
# language of the text matches the language of the model.
|
947
|
+
#
|
948
|
+
# Adding a prompt is an asynchronous operation. Although it accepts less audio than
|
949
|
+
# speaker enrollment, the service must align the audio with the provided text. The
|
950
|
+
# time that it takes to process a prompt depends on the prompt itself. The
|
951
|
+
# processing time for a reasonably sized prompt generally matches the length of the
|
952
|
+
# audio (for example, it takes 20 seconds to process a 20-second prompt).
|
953
|
+
#
|
954
|
+
# For shorter prompts, you can wait for a reasonable amount of time and then check
|
955
|
+
# the status of the prompt with the **Get a custom prompt** method. For longer
|
956
|
+
# prompts, consider using that method to poll the service every few seconds to
|
957
|
+
# determine when the prompt becomes available. No prompt can be used for speech
|
958
|
+
# synthesis if it is in the `processing` or `failed` state. Only prompts that are in
|
959
|
+
# the `available` state can be used for speech synthesis.
|
960
|
+
#
|
961
|
+
# When it processes a request, the service attempts to align the text and the audio
|
962
|
+
# that are provided for the prompt. The text that is passed with a prompt must match
|
963
|
+
# the spoken audio as closely as possible. Optimally, the text and audio match
|
964
|
+
# exactly. The service does its best to align the specified text with the audio, and
|
965
|
+
# it can often compensate for mismatches between the two. But if the service cannot
|
966
|
+
# effectively align the text and the audio, possibly because the magnitude of
|
967
|
+
# mismatches between the two is too great, processing of the prompt fails.
|
968
|
+
#
|
969
|
+
# ### Evaluating a prompt
|
970
|
+
#
|
971
|
+
# Always listen to and evaluate a prompt to determine its quality before using it
|
972
|
+
# in production. To evaluate a prompt, include only the single prompt in a speech
|
973
|
+
# synthesis request by using the following SSML extension, in this case for a prompt
|
974
|
+
# whose ID is `goodbye`:
|
975
|
+
#
|
976
|
+
# `<ibm:prompt id="goodbye"/>`
|
977
|
+
#
|
978
|
+
# In some cases, you might need to rerecord and resubmit a prompt as many as five
|
979
|
+
# times to address the following possible problems:
|
980
|
+
# * The service might fail to detect a mismatch between the prompts text and audio.
|
981
|
+
# The longer the prompt, the greater the chance for misalignment between its text
|
982
|
+
# and audio. Therefore, multiple shorter prompts are preferable to a single long
|
983
|
+
# prompt.
|
984
|
+
# * The text of a prompt might include a word that the service does not recognize.
|
985
|
+
# In this case, you can create a custom word and pronunciation pair to tell the
|
986
|
+
# service how to pronounce the word. You must then re-create the prompt.
|
987
|
+
# * The quality of the input audio might be insufficient or the services processing
|
988
|
+
# of the audio might fail to detect the intended prosody. Submitting new audio for
|
989
|
+
# the prompt can correct these issues.
|
990
|
+
#
|
991
|
+
# If a prompt that is created without a speaker ID does not adequately reflect the
|
992
|
+
# intended prosody, enrolling the speaker and providing a speaker ID for the prompt
|
993
|
+
# is one recommended means of potentially improving the quality of the prompt. This
|
994
|
+
# is especially important for shorter prompts such as "good-bye" or "thank you,"
|
995
|
+
# where less audio data makes it more difficult to match the prosody of the speaker.
|
996
|
+
#
|
997
|
+
#
|
998
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
999
|
+
# with US English custom models and voices.
|
1000
|
+
#
|
1001
|
+
# **See also:**
|
1002
|
+
# * [Add a custom
|
1003
|
+
# prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-add-prompt)
|
1004
|
+
# * [Evaluate a custom
|
1005
|
+
# prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-evaluate-prompt)
|
1006
|
+
# * [Rules for creating custom
|
1007
|
+
# prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-prompts).
|
1008
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
1009
|
+
# credentials for the instance of the service that owns the custom model.
|
1010
|
+
# @param prompt_id [String] The identifier of the prompt that is to be added to the custom model:
|
1011
|
+
# * Include a maximum of 49 characters in the ID.
|
1012
|
+
# * Include only alphanumeric characters and `_` (underscores) in the ID.
|
1013
|
+
# * Do not include XML sensitive characters (double quotes, single quotes,
|
1014
|
+
# ampersands, angle brackets, and slashes) in the ID.
|
1015
|
+
# * To add a new prompt, the ID must be unique for the specified custom model.
|
1016
|
+
# Otherwise, the new information for the prompt overwrites the existing prompt that
|
1017
|
+
# has that ID.
|
1018
|
+
# @param metadata [PromptMetadata] Information about the prompt that is to be added to a custom model. The following
|
1019
|
+
# example of a `PromptMetadata` object includes both the required prompt text and an
|
1020
|
+
# optional speaker model ID:
|
1021
|
+
#
|
1022
|
+
# `{ "prompt_text": "Thank you and good-bye!", "speaker_id":
|
1023
|
+
# "823068b2-ed4e-11ea-b6e0-7b6456aa95cc" }`.
|
1024
|
+
# @param file [File] An audio file that speaks the text of the prompt with intonation and prosody that
|
1025
|
+
# matches how you would like the prompt to be spoken.
|
1026
|
+
# * The prompt audio must be in WAV format and must have a minimum sampling rate of
|
1027
|
+
# 16 kHz. The service accepts audio with higher sampling rates. The service
|
1028
|
+
# transcodes all audio to 16 kHz before processing it.
|
1029
|
+
# * The length of the prompt audio is limited to 30 seconds.
|
1030
|
+
# @param filename [String] The filename for file.
|
1031
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1032
|
+
def add_custom_prompt(customization_id:, prompt_id:, metadata:, file:, filename: nil)
|
1033
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
1034
|
+
|
1035
|
+
raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
|
1036
|
+
|
1037
|
+
raise ArgumentError.new("metadata must be provided") if metadata.nil?
|
1038
|
+
|
1039
|
+
raise ArgumentError.new("file must be provided") if file.nil?
|
1040
|
+
|
1041
|
+
headers = {
|
1042
|
+
}
|
1043
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "add_custom_prompt")
|
1044
|
+
headers.merge!(sdk_headers)
|
1045
|
+
|
1046
|
+
form_data = {}
|
1047
|
+
|
1048
|
+
form_data[:metadata] = HTTP::FormData::Part.new(metadata.to_s, content_type: "application/json")
|
1049
|
+
|
1050
|
+
unless file.instance_of?(StringIO) || file.instance_of?(File)
|
1051
|
+
file = file.respond_to?(:to_json) ? StringIO.new(file.to_json) : StringIO.new(file)
|
1052
|
+
end
|
1053
|
+
filename = file.path if filename.nil? && file.respond_to?(:path)
|
1054
|
+
form_data[:file] = HTTP::FormData::File.new(file, content_type: "audio/wav", filename: filename)
|
1055
|
+
|
1056
|
+
method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
|
1057
|
+
|
1058
|
+
response = request(
|
1059
|
+
method: "POST",
|
1060
|
+
url: method_url,
|
1061
|
+
headers: headers,
|
1062
|
+
form: form_data,
|
1063
|
+
accept_json: true
|
1064
|
+
)
|
1065
|
+
response
|
1066
|
+
end
|
1067
|
+
|
1068
|
+
##
|
1069
|
+
# @!method get_custom_prompt(customization_id:, prompt_id:)
|
1070
|
+
# Get a custom prompt.
|
1071
|
+
# Gets information about a specified custom prompt for a specified custom model. The
|
1072
|
+
# information includes the prompt ID, prompt text, status, and optional speaker ID
|
1073
|
+
# for each prompt of the custom model. You must use credentials for the instance of
|
1074
|
+
# the service that owns the custom model.
|
1075
|
+
#
|
1076
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
1077
|
+
# with US English custom models and voices.
|
1078
|
+
#
|
1079
|
+
# **See also:** [Listing custom
|
1080
|
+
# prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list).
|
1081
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
1082
|
+
# credentials for the instance of the service that owns the custom model.
|
1083
|
+
# @param prompt_id [String] The identifier (name) of the prompt.
|
1084
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1085
|
+
def get_custom_prompt(customization_id:, prompt_id:)
|
1086
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
1087
|
+
|
1088
|
+
raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
|
1089
|
+
|
1090
|
+
headers = {
|
1091
|
+
}
|
1092
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "get_custom_prompt")
|
1093
|
+
headers.merge!(sdk_headers)
|
1094
|
+
|
1095
|
+
method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
|
1096
|
+
|
1097
|
+
response = request(
|
1098
|
+
method: "GET",
|
1099
|
+
url: method_url,
|
1100
|
+
headers: headers,
|
1101
|
+
accept_json: true
|
1102
|
+
)
|
1103
|
+
response
|
1104
|
+
end
|
1105
|
+
|
1106
|
+
##
|
1107
|
+
# @!method delete_custom_prompt(customization_id:, prompt_id:)
|
1108
|
+
# Delete a custom prompt.
|
1109
|
+
# Deletes an existing custom prompt from a custom model. The service deletes the
|
1110
|
+
# prompt with the specified ID. You must use credentials for the instance of the
|
1111
|
+
# service that owns the custom model from which the prompt is to be deleted.
|
1112
|
+
#
|
1113
|
+
# **Caution:** Deleting a custom prompt elicits a 400 response code from synthesis
|
1114
|
+
# requests that attempt to use the prompt. Make sure that you do not attempt to use
|
1115
|
+
# a deleted prompt in a production application.
|
1116
|
+
#
|
1117
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
1118
|
+
# with US English custom models and voices.
|
1119
|
+
#
|
1120
|
+
# **See also:** [Deleting a custom
|
1121
|
+
# prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-delete).
|
1122
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
1123
|
+
# credentials for the instance of the service that owns the custom model.
|
1124
|
+
# @param prompt_id [String] The identifier (name) of the prompt that is to be deleted.
|
1125
|
+
# @return [nil]
|
1126
|
+
def delete_custom_prompt(customization_id:, prompt_id:)
|
1127
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
1128
|
+
|
1129
|
+
raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
|
1130
|
+
|
1131
|
+
headers = {
|
1132
|
+
}
|
1133
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "delete_custom_prompt")
|
1134
|
+
headers.merge!(sdk_headers)
|
1135
|
+
|
1136
|
+
method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
|
1137
|
+
|
1138
|
+
request(
|
1139
|
+
method: "DELETE",
|
1140
|
+
url: method_url,
|
1141
|
+
headers: headers,
|
1142
|
+
accept_json: false
|
1143
|
+
)
|
1144
|
+
nil
|
1145
|
+
end
|
1146
|
+
#########################
|
1147
|
+
# Speaker models
|
1148
|
+
#########################
|
1149
|
+
|
1150
|
+
##
|
1151
|
+
# @!method list_speaker_models
|
1152
|
+
# List speaker models.
|
1153
|
+
# Lists information about all speaker models that are defined for a service
|
1154
|
+
# instance. The information includes the speaker ID and speaker name of each defined
|
1155
|
+
# speaker. You must use credentials for the instance of a service to list its
|
1156
|
+
# speakers.
|
1157
|
+
#
|
1158
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1159
|
+
# functionality that is supported only for use with US English custom models and
|
1160
|
+
# voices.
|
1161
|
+
#
|
1162
|
+
# **See also:** [Listing speaker
|
1163
|
+
# models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list).
|
1164
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1165
|
+
def list_speaker_models
|
1166
|
+
headers = {
|
1167
|
+
}
|
1168
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "list_speaker_models")
|
1169
|
+
headers.merge!(sdk_headers)
|
1170
|
+
|
1171
|
+
method_url = "/v1/speakers"
|
1172
|
+
|
1173
|
+
response = request(
|
1174
|
+
method: "GET",
|
1175
|
+
url: method_url,
|
1176
|
+
headers: headers,
|
1177
|
+
accept_json: true
|
1178
|
+
)
|
1179
|
+
response
|
1180
|
+
end
|
1181
|
+
|
1182
|
+
##
|
1183
|
+
# @!method create_speaker_model(speaker_name:, audio:)
|
1184
|
+
# Create a speaker model.
|
1185
|
+
# Creates a new speaker model, which is an optional enrollment token for users who
|
1186
|
+
# are to add prompts to custom models. A speaker model contains information about a
|
1187
|
+
# user's voice. The service extracts this information from a WAV audio sample that
|
1188
|
+
# you pass as the body of the request. Associating a speaker model with a prompt is
|
1189
|
+
# optional, but the information that is extracted from the speaker model helps the
|
1190
|
+
# service learn about the speaker's voice.
|
1191
|
+
#
|
1192
|
+
# A speaker model can make an appreciable difference in the quality of prompts,
|
1193
|
+
# especially short prompts with relatively little audio, that are associated with
|
1194
|
+
# that speaker. A speaker model can help the service produce a prompt with more
|
1195
|
+
# confidence; the lack of a speaker model can potentially compromise the quality of
|
1196
|
+
# a prompt.
|
1197
|
+
#
|
1198
|
+
# The gender of the speaker who creates a speaker model does not need to match the
|
1199
|
+
# gender of a voice that is used with prompts that are associated with that speaker
|
1200
|
+
# model. For example, a speaker model that is created by a male speaker can be
|
1201
|
+
# associated with prompts that are spoken by female voices.
|
1202
|
+
#
|
1203
|
+
# You create a speaker model for a given instance of the service. The new speaker
|
1204
|
+
# model is owned by the service instance whose credentials are used to create it.
|
1205
|
+
# That same speaker can then be used to create prompts for all custom models within
|
1206
|
+
# that service instance. No language is associated with a speaker model, but each
|
1207
|
+
# custom model has a single specified language. You can add prompts only to US
|
1208
|
+
# English models.
|
1209
|
+
#
|
1210
|
+
# You specify a name for the speaker when you create it. The name must be unique
|
1211
|
+
# among all speaker names for the owning service instance. To re-create a speaker
|
1212
|
+
# model for an existing speaker name, you must first delete the existing speaker
|
1213
|
+
# model that has that name.
|
1214
|
+
#
|
1215
|
+
# Speaker enrollment is a synchronous operation. Although it accepts more audio data
|
1216
|
+
# than a prompt, the process of adding a speaker is very fast. The service simply
|
1217
|
+
# extracts information about the speakers voice from the audio. Unlike prompts,
|
1218
|
+
# speaker models neither need nor accept a transcription of the audio. When the call
|
1219
|
+
# returns, the audio is fully processed and the speaker enrollment is complete.
|
1220
|
+
#
|
1221
|
+
# The service returns a speaker ID with the request. A speaker ID is globally unique
|
1222
|
+
# identifier (GUID) that you use to identify the speaker in subsequent requests to
|
1223
|
+
# the service.
|
1224
|
+
#
|
1225
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1226
|
+
# functionality that is supported only for use with US English custom models and
|
1227
|
+
# voices.
|
1228
|
+
#
|
1229
|
+
# **See also:**
|
1230
|
+
# * [Create a speaker
|
1231
|
+
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-speaker-model)
|
1232
|
+
# * [Rules for creating speaker
|
1233
|
+
# models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-speakers).
|
1234
|
+
# @param speaker_name [String] The name of the speaker that is to be added to the service instance.
|
1235
|
+
# * Include a maximum of 49 characters in the name.
|
1236
|
+
# * Include only alphanumeric characters and `_` (underscores) in the name.
|
1237
|
+
# * Do not include XML sensitive characters (double quotes, single quotes,
|
1238
|
+
# ampersands, angle brackets, and slashes) in the name.
|
1239
|
+
# * Do not use the name of an existing speaker that is already defined for the
|
1240
|
+
# service instance.
|
1241
|
+
# @param audio [File] An enrollment audio file that contains a sample of the speakers voice.
|
1242
|
+
# * The enrollment audio must be in WAV format and must have a minimum sampling rate
|
1243
|
+
# of 16 kHz. The service accepts audio with higher sampling rates. It transcodes all
|
1244
|
+
# audio to 16 kHz before processing it.
|
1245
|
+
# * The length of the enrollment audio is limited to 1 minute. Speaking one or two
|
1246
|
+
# paragraphs of text that include five to ten sentences is recommended.
|
1247
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1248
|
+
def create_speaker_model(speaker_name:, audio:)
|
1249
|
+
raise ArgumentError.new("speaker_name must be provided") if speaker_name.nil?
|
1250
|
+
|
1251
|
+
raise ArgumentError.new("audio must be provided") if audio.nil?
|
1252
|
+
|
1253
|
+
headers = {
|
1254
|
+
}
|
1255
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "create_speaker_model")
|
1256
|
+
headers.merge!(sdk_headers)
|
1257
|
+
|
1258
|
+
params = {
|
1259
|
+
"speaker_name" => speaker_name
|
1260
|
+
}
|
1261
|
+
|
1262
|
+
data = audio
|
1263
|
+
headers["Content-Type"] = "audio/wav"
|
1264
|
+
|
1265
|
+
method_url = "/v1/speakers"
|
1266
|
+
|
1267
|
+
response = request(
|
1268
|
+
method: "POST",
|
1269
|
+
url: method_url,
|
1270
|
+
headers: headers,
|
1271
|
+
params: params,
|
1272
|
+
data: data,
|
1273
|
+
accept_json: true
|
1274
|
+
)
|
1275
|
+
response
|
1276
|
+
end
|
1277
|
+
|
1278
|
+
##
|
1279
|
+
# @!method get_speaker_model(speaker_id:)
|
1280
|
+
# Get a speaker model.
|
1281
|
+
# Gets information about all prompts that are defined by a specified speaker for all
|
1282
|
+
# custom models that are owned by a service instance. The information is grouped by
|
1283
|
+
# the customization IDs of the custom models. For each custom model, the information
|
1284
|
+
# lists information about each prompt that is defined for that custom model by the
|
1285
|
+
# speaker. You must use credentials for the instance of the service that owns a
|
1286
|
+
# speaker model to list its prompts.
|
1287
|
+
#
|
1288
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1289
|
+
# functionality that is supported only for use with US English custom models and
|
1290
|
+
# voices.
|
1291
|
+
#
|
1292
|
+
# **See also:** [Listing the custom prompts for a speaker
|
1293
|
+
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list-prompts).
|
1294
|
+
# @param speaker_id [String] The speaker ID (GUID) of the speaker model. You must make the request with service
|
1295
|
+
# credentials for the instance of the service that owns the speaker model.
|
1296
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1297
|
+
def get_speaker_model(speaker_id:)
|
1298
|
+
raise ArgumentError.new("speaker_id must be provided") if speaker_id.nil?
|
1299
|
+
|
1300
|
+
headers = {
|
1301
|
+
}
|
1302
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "get_speaker_model")
|
1303
|
+
headers.merge!(sdk_headers)
|
1304
|
+
|
1305
|
+
method_url = "/v1/speakers/%s" % [ERB::Util.url_encode(speaker_id)]
|
1306
|
+
|
1307
|
+
response = request(
|
1308
|
+
method: "GET",
|
1309
|
+
url: method_url,
|
1310
|
+
headers: headers,
|
1311
|
+
accept_json: true
|
1312
|
+
)
|
1313
|
+
response
|
1314
|
+
end
|
1315
|
+
|
1316
|
+
##
|
1317
|
+
# @!method delete_speaker_model(speaker_id:)
|
1318
|
+
# Delete a speaker model.
|
1319
|
+
# Deletes an existing speaker model from the service instance. The service deletes
|
1320
|
+
# the enrolled speaker with the specified speaker ID. You must use credentials for
|
1321
|
+
# the instance of the service that owns a speaker model to delete the speaker.
|
1322
|
+
#
|
1323
|
+
# Any prompts that are associated with the deleted speaker are not affected by the
|
1324
|
+
# speaker's deletion. The prosodic data that defines the quality of a prompt is
|
1325
|
+
# established when the prompt is created. A prompt is static and remains unaffected
|
1326
|
+
# by deletion of its associated speaker. However, the prompt cannot be resubmitted
|
1327
|
+
# or updated with its original speaker once that speaker is deleted.
|
1328
|
+
#
|
1329
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1330
|
+
# functionality that is supported only for use with US English custom models and
|
1331
|
+
# voices.
|
1332
|
+
#
|
1333
|
+
# **See also:** [Deleting a speaker
|
1334
|
+
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-delete).
|
1335
|
+
# @param speaker_id [String] The speaker ID (GUID) of the speaker model. You must make the request with service
|
1336
|
+
# credentials for the instance of the service that owns the speaker model.
|
1337
|
+
# @return [nil]
|
1338
|
+
def delete_speaker_model(speaker_id:)
|
1339
|
+
raise ArgumentError.new("speaker_id must be provided") if speaker_id.nil?
|
1340
|
+
|
1341
|
+
headers = {
|
1342
|
+
}
|
1343
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "delete_speaker_model")
|
1344
|
+
headers.merge!(sdk_headers)
|
1345
|
+
|
1346
|
+
method_url = "/v1/speakers/%s" % [ERB::Util.url_encode(speaker_id)]
|
1347
|
+
|
1348
|
+
request(
|
1349
|
+
method: "DELETE",
|
1350
|
+
url: method_url,
|
1351
|
+
headers: headers,
|
1352
|
+
accept_json: false
|
1353
|
+
)
|
1354
|
+
nil
|
1355
|
+
end
|
1356
|
+
#########################
|
798
1357
|
# User data
|
799
1358
|
#########################
|
800
1359
|
|
@@ -811,7 +1370,7 @@ module IBMWatson
|
|
811
1370
|
#
|
812
1371
|
# **Note:** If you delete an instance of the service from the service console, all
|
813
1372
|
# data associated with that service instance is automatically deleted. This includes
|
814
|
-
# all custom
|
1373
|
+
# all custom models and word/translation pairs, and all data related to speech
|
815
1374
|
# synthesis requests.
|
816
1375
|
#
|
817
1376
|
# **See also:** [Information
|