ibm_watson 2.0.2 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/ibm_watson/assistant_v1.rb +111 -77
- data/lib/ibm_watson/assistant_v2.rb +83 -59
- data/lib/ibm_watson/compare_comply_v1.rb +11 -4
- data/lib/ibm_watson/discovery_v1.rb +2 -3
- data/lib/ibm_watson/discovery_v2.rb +97 -7
- data/lib/ibm_watson/language_translator_v3.rb +1 -2
- data/lib/ibm_watson/natural_language_classifier_v1.rb +1 -2
- data/lib/ibm_watson/natural_language_understanding_v1.rb +692 -3
- data/lib/ibm_watson/personality_insights_v3.rb +13 -11
- data/lib/ibm_watson/speech_to_text_v1.rb +257 -106
- data/lib/ibm_watson/text_to_speech_v1.rb +601 -19
- data/lib/ibm_watson/tone_analyzer_v3.rb +1 -2
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +1 -2
- data/lib/ibm_watson/visual_recognition_v4.rb +11 -8
- data/test/integration/test_discovery_v2.rb +15 -0
- data/test/integration/test_natural_language_understanding_v1.rb +134 -1
- data/test/integration/test_text_to_speech_v1.rb +57 -0
- data/test/unit/test_discovery_v2.rb +29 -0
- data/test/unit/test_natural_language_understanding_v1.rb +231 -0
- data/test/unit/test_text_to_speech_v1.rb +145 -0
- metadata +3 -3
@@ -14,7 +14,7 @@
|
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
16
|
#
|
17
|
-
# IBM OpenAPI SDK Code Generator Version: 3.
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.31.0-902c9336-20210504-161156
|
18
18
|
#
|
19
19
|
# The IBM Watson™ Text to Speech service provides APIs that use IBM's
|
20
20
|
# speech-synthesis capabilities to synthesize text into natural-sounding speech in a
|
@@ -33,8 +33,12 @@
|
|
33
33
|
# that, when combined, sound like the word. A phonetic translation is based on the SSML
|
34
34
|
# phoneme format for representing a word. You can specify a phonetic translation in
|
35
35
|
# standard International Phonetic Alphabet (IPA) representation or in the proprietary IBM
|
36
|
-
# Symbolic Phonetic Representation (SPR). The Arabic, Chinese, Dutch,
|
37
|
-
# support only IPA.
|
36
|
+
# Symbolic Phonetic Representation (SPR). The Arabic, Chinese, Dutch, Australian English,
|
37
|
+
# and Korean languages support only IPA.
|
38
|
+
#
|
39
|
+
# The service also offers a Tune by Example feature that lets you define custom prompts.
|
40
|
+
# You can also define speaker models to improve the quality of your custom prompts. The
|
41
|
+
# service support custom prompts only for US English custom models and voices.
|
38
42
|
|
39
43
|
require "concurrent"
|
40
44
|
require "erb"
|
@@ -42,7 +46,6 @@ require "json"
|
|
42
46
|
require "ibm_cloud_sdk_core"
|
43
47
|
require_relative "./common.rb"
|
44
48
|
|
45
|
-
# Module for the Watson APIs
|
46
49
|
module IBMWatson
|
47
50
|
##
|
48
51
|
# The Text to Speech V1 service.
|
@@ -117,7 +120,33 @@ module IBMWatson
|
|
117
120
|
#
|
118
121
|
# **See also:** [Listing a specific
|
119
122
|
# voice](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices#listVoice).
|
120
|
-
#
|
123
|
+
#
|
124
|
+
#
|
125
|
+
# ### Important voice updates
|
126
|
+
#
|
127
|
+
# The service's voices underwent significant change on 2 December 2020.
|
128
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
129
|
+
# instead of concatenative.
|
130
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
131
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
132
|
+
# `ar-MS` identifier instead.
|
133
|
+
# * The standard concatenative voices for the following languages are now
|
134
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
135
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
136
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
137
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
138
|
+
# of the service's neural voices.
|
139
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
140
|
+
# for production use.
|
141
|
+
#
|
142
|
+
# The deprecated voices and features will continue to function for at least one year
|
143
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
144
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
145
|
+
# all voice updates, see the [2 December 2020 service
|
146
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
147
|
+
# in the release notes.
|
148
|
+
# @param voice [String] The voice for which information is to be returned. For more information about
|
149
|
+
# specifying a voice, see **Important voice updates** in the method description.
|
121
150
|
# @param customization_id [String] The customization ID (GUID) of a custom model for which information is to be
|
122
151
|
# returned. You must make the request with credentials for the instance of the
|
123
152
|
# service that owns the custom model. Omit the parameter to see information about
|
@@ -213,6 +242,30 @@ module IBMWatson
|
|
213
242
|
# formats](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-audioFormats#audioFormats).
|
214
243
|
#
|
215
244
|
#
|
245
|
+
# ### Important voice updates
|
246
|
+
#
|
247
|
+
# The service's voices underwent significant change on 2 December 2020.
|
248
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
249
|
+
# instead of concatenative.
|
250
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
251
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
252
|
+
# `ar-MS` identifier instead.
|
253
|
+
# * The standard concatenative voices for the following languages are now
|
254
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
255
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
256
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
257
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
258
|
+
# of the service's neural voices.
|
259
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
260
|
+
# for production use.
|
261
|
+
#
|
262
|
+
# The deprecated voices and features will continue to function for at least one year
|
263
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
264
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
265
|
+
# all voice updates, see the [2 December 2020 service
|
266
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
267
|
+
# in the release notes.
|
268
|
+
#
|
216
269
|
# ### Warning messages
|
217
270
|
#
|
218
271
|
# If a request includes invalid query parameters, the service returns a `Warnings`
|
@@ -226,7 +279,8 @@ module IBMWatson
|
|
226
279
|
# the `accept` parameter to specify the audio format. For more information about
|
227
280
|
# specifying an audio format, see **Audio formats (accept types)** in the method
|
228
281
|
# description.
|
229
|
-
# @param voice [String] The voice to use for synthesis.
|
282
|
+
# @param voice [String] The voice to use for synthesis. For more information about specifying a voice, see
|
283
|
+
# **Important voice updates** in the method description.
|
230
284
|
# @param customization_id [String] The customization ID (GUID) of a custom model to use for the synthesis. If a
|
231
285
|
# custom model is specified, it works only if it matches the language of the
|
232
286
|
# indicated voice. You must make the request with credentials for the instance of
|
@@ -277,13 +331,39 @@ module IBMWatson
|
|
277
331
|
#
|
278
332
|
# **See also:** [Querying a word from a
|
279
333
|
# language](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordsQueryLanguage).
|
334
|
+
#
|
335
|
+
#
|
336
|
+
# ### Important voice updates
|
337
|
+
#
|
338
|
+
# The service's voices underwent significant change on 2 December 2020.
|
339
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
340
|
+
# instead of concatenative.
|
341
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
342
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
343
|
+
# `ar-MS` identifier instead.
|
344
|
+
# * The standard concatenative voices for the following languages are now
|
345
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
346
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
347
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
348
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
349
|
+
# of the service's neural voices.
|
350
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
351
|
+
# for production use.
|
352
|
+
#
|
353
|
+
# The deprecated voices and features will continue to function for at least one year
|
354
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
355
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
356
|
+
# all voice updates, see the [2 December 2020 service
|
357
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
358
|
+
# in the release notes.
|
280
359
|
# @param text [String] The word for which the pronunciation is requested.
|
281
360
|
# @param voice [String] A voice that specifies the language in which the pronunciation is to be returned.
|
282
361
|
# All voices for the same language (for example, `en-US`) return the same
|
283
|
-
# translation.
|
362
|
+
# translation. For more information about specifying a voice, see **Important voice
|
363
|
+
# updates** in the method description.
|
284
364
|
# @param format [String] The phoneme format in which to return the pronunciation. The Arabic, Chinese,
|
285
|
-
# Dutch, and Korean languages support only IPA. Omit the
|
286
|
-
# pronunciation in the default format.
|
365
|
+
# Dutch, Australian English, and Korean languages support only IPA. Omit the
|
366
|
+
# parameter to obtain the pronunciation in the default format.
|
287
367
|
# @param customization_id [String] The customization ID (GUID) of a custom model for which the pronunciation is to be
|
288
368
|
# returned. The language of a specified custom model must match the language of the
|
289
369
|
# specified voice. If the word is not defined in the specified custom model, the
|
@@ -332,11 +412,37 @@ module IBMWatson
|
|
332
412
|
#
|
333
413
|
# **See also:** [Creating a custom
|
334
414
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsCreate).
|
415
|
+
#
|
416
|
+
#
|
417
|
+
# ### Important voice updates
|
418
|
+
#
|
419
|
+
# The service's voices underwent significant change on 2 December 2020.
|
420
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
421
|
+
# instead of concatenative.
|
422
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
423
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
424
|
+
# `ar-MS` identifier instead.
|
425
|
+
# * The standard concatenative voices for the following languages are now
|
426
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
427
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
428
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
429
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
430
|
+
# of the service's neural voices.
|
431
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
432
|
+
# for production use.
|
433
|
+
#
|
434
|
+
# The deprecated voices and features will continue to function for at least one year
|
435
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
436
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
437
|
+
# all voice updates, see the [2 December 2020 service
|
438
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
439
|
+
# in the release notes.
|
335
440
|
# @param name [String] The name of the new custom model.
|
336
441
|
# @param language [String] The language of the new custom model. You create a custom model for a specific
|
337
|
-
# language, not for a specific voice. A custom model can be used with any voice
|
338
|
-
#
|
339
|
-
#
|
442
|
+
# language, not for a specific voice. A custom model can be used with any voice for
|
443
|
+
# its specified language. Omit the parameter to use the the default language,
|
444
|
+
# `en-US`. **Note:** The `ar-AR` language identifier cannot be used to create a
|
445
|
+
# custom model. Use the `ar-MS` identifier instead.
|
340
446
|
# @param description [String] A description of the new custom model. Specifying a description is recommended.
|
341
447
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
342
448
|
def create_custom_model(name:, language: nil, description: nil)
|
@@ -370,8 +476,8 @@ module IBMWatson
|
|
370
476
|
# List custom models.
|
371
477
|
# Lists metadata such as the name and description for all custom models that are
|
372
478
|
# owned by an instance of the service. Specify a language to list the custom models
|
373
|
-
# for that language only. To see the words in addition to the metadata
|
374
|
-
# specific custom model, use the **
|
479
|
+
# for that language only. To see the words and prompts in addition to the metadata
|
480
|
+
# for a specific custom model, use the **Get a custom model** method. You must use
|
375
481
|
# credentials for the instance of the service that owns a model to list information
|
376
482
|
# about it.
|
377
483
|
#
|
@@ -473,8 +579,9 @@ module IBMWatson
|
|
473
579
|
# Get a custom model.
|
474
580
|
# Gets all information about a specified custom model. In addition to metadata such
|
475
581
|
# as the name and description of the custom model, the output includes the words and
|
476
|
-
# their translations
|
477
|
-
# use the **List custom
|
582
|
+
# their translations that are defined for the model, as well as any prompts that are
|
583
|
+
# defined for the model. To see just the metadata for a model, use the **List custom
|
584
|
+
# models** method.
|
478
585
|
#
|
479
586
|
# **See also:** [Querying a custom
|
480
587
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsQuery).
|
@@ -666,9 +773,9 @@ module IBMWatson
|
|
666
773
|
# @param word [String] The word that is to be added or updated for the custom model.
|
667
774
|
# @param translation [String] The phonetic or sounds-like translation for the word. A phonetic translation is
|
668
775
|
# based on the SSML format for representing the phonetic string of a word either as
|
669
|
-
# an IPA translation or as an IBM SPR translation. The Arabic, Chinese, Dutch,
|
670
|
-
# Korean languages support only IPA. A sounds-like is one or
|
671
|
-
# combined, sound like the word.
|
776
|
+
# an IPA translation or as an IBM SPR translation. The Arabic, Chinese, Dutch,
|
777
|
+
# Australian English, and Korean languages support only IPA. A sounds-like is one or
|
778
|
+
# more words that, when combined, sound like the word.
|
672
779
|
# @param part_of_speech [String] **Japanese only.** The part of speech for the word. The service uses the value to
|
673
780
|
# produce the correct intonation for the word. You can create only a single entry,
|
674
781
|
# with or without a single part of speech, for any word; you cannot create multiple
|
@@ -772,6 +879,481 @@ module IBMWatson
|
|
772
879
|
nil
|
773
880
|
end
|
774
881
|
#########################
|
882
|
+
# Custom prompts
|
883
|
+
#########################
|
884
|
+
|
885
|
+
##
|
886
|
+
# @!method list_custom_prompts(customization_id:)
|
887
|
+
# List custom prompts.
|
888
|
+
# Lists information about all custom prompts that are defined for a custom model.
|
889
|
+
# The information includes the prompt ID, prompt text, status, and optional speaker
|
890
|
+
# ID for each prompt of the custom model. You must use credentials for the instance
|
891
|
+
# of the service that owns the custom model. The same information about all of the
|
892
|
+
# prompts for a custom model is also provided by the **Get a custom model** method.
|
893
|
+
# That method provides complete details about a specified custom model, including
|
894
|
+
# its language, owner, custom words, and more.
|
895
|
+
#
|
896
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
897
|
+
# with US English custom models and voices.
|
898
|
+
#
|
899
|
+
# **See also:** [Listing custom
|
900
|
+
# prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list).
|
901
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
902
|
+
# credentials for the instance of the service that owns the custom model.
|
903
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
904
|
+
def list_custom_prompts(customization_id:)
|
905
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
906
|
+
|
907
|
+
headers = {
|
908
|
+
}
|
909
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "list_custom_prompts")
|
910
|
+
headers.merge!(sdk_headers)
|
911
|
+
|
912
|
+
method_url = "/v1/customizations/%s/prompts" % [ERB::Util.url_encode(customization_id)]
|
913
|
+
|
914
|
+
response = request(
|
915
|
+
method: "GET",
|
916
|
+
url: method_url,
|
917
|
+
headers: headers,
|
918
|
+
accept_json: true
|
919
|
+
)
|
920
|
+
response
|
921
|
+
end
|
922
|
+
|
923
|
+
##
|
924
|
+
# @!method add_custom_prompt(customization_id:, prompt_id:, metadata:, file:, filename: nil)
|
925
|
+
# Add a custom prompt.
|
926
|
+
# Adds a custom prompt to a custom model. A prompt is defined by the text that is to
|
927
|
+
# be spoken, the audio for that text, a unique user-specified ID for the prompt, and
|
928
|
+
# an optional speaker ID. The information is used to generate prosodic data that is
|
929
|
+
# not visible to the user. This data is used by the service to produce the
|
930
|
+
# synthesized audio upon request. You must use credentials for the instance of the
|
931
|
+
# service that owns a custom model to add a prompt to it. You can add a maximum of
|
932
|
+
# 1000 custom prompts to a single custom model.
|
933
|
+
#
|
934
|
+
# You are recommended to assign meaningful values for prompt IDs. For example, use
|
935
|
+
# `goodbye` to identify a prompt that speaks a farewell message. Prompt IDs must be
|
936
|
+
# unique within a given custom model. You cannot define two prompts with the same
|
937
|
+
# name for the same custom model. If you provide the ID of an existing prompt, the
|
938
|
+
# previously uploaded prompt is replaced by the new information. The existing prompt
|
939
|
+
# is reprocessed by using the new text and audio and, if provided, new speaker
|
940
|
+
# model, and the prosody data associated with the prompt is updated.
|
941
|
+
#
|
942
|
+
# The quality of a prompt is undefined if the language of a prompt does not match
|
943
|
+
# the language of its custom model. This is consistent with any text or SSML that is
|
944
|
+
# specified for a speech synthesis request. The service makes a best-effort attempt
|
945
|
+
# to render the specified text for the prompt; it does not validate that the
|
946
|
+
# language of the text matches the language of the model.
|
947
|
+
#
|
948
|
+
# Adding a prompt is an asynchronous operation. Although it accepts less audio than
|
949
|
+
# speaker enrollment, the service must align the audio with the provided text. The
|
950
|
+
# time that it takes to process a prompt depends on the prompt itself. The
|
951
|
+
# processing time for a reasonably sized prompt generally matches the length of the
|
952
|
+
# audio (for example, it takes 20 seconds to process a 20-second prompt).
|
953
|
+
#
|
954
|
+
# For shorter prompts, you can wait for a reasonable amount of time and then check
|
955
|
+
# the status of the prompt with the **Get a custom prompt** method. For longer
|
956
|
+
# prompts, consider using that method to poll the service every few seconds to
|
957
|
+
# determine when the prompt becomes available. No prompt can be used for speech
|
958
|
+
# synthesis if it is in the `processing` or `failed` state. Only prompts that are in
|
959
|
+
# the `available` state can be used for speech synthesis.
|
960
|
+
#
|
961
|
+
# When it processes a request, the service attempts to align the text and the audio
|
962
|
+
# that are provided for the prompt. The text that is passed with a prompt must match
|
963
|
+
# the spoken audio as closely as possible. Optimally, the text and audio match
|
964
|
+
# exactly. The service does its best to align the specified text with the audio, and
|
965
|
+
# it can often compensate for mismatches between the two. But if the service cannot
|
966
|
+
# effectively align the text and the audio, possibly because the magnitude of
|
967
|
+
# mismatches between the two is too great, processing of the prompt fails.
|
968
|
+
#
|
969
|
+
# ### Evaluating a prompt
|
970
|
+
#
|
971
|
+
# Always listen to and evaluate a prompt to determine its quality before using it
|
972
|
+
# in production. To evaluate a prompt, include only the single prompt in a speech
|
973
|
+
# synthesis request by using the following SSML extension, in this case for a prompt
|
974
|
+
# whose ID is `goodbye`:
|
975
|
+
#
|
976
|
+
# `<ibm:prompt id="goodbye"/>`
|
977
|
+
#
|
978
|
+
# In some cases, you might need to rerecord and resubmit a prompt as many as five
|
979
|
+
# times to address the following possible problems:
|
980
|
+
# * The service might fail to detect a mismatch between the prompts text and audio.
|
981
|
+
# The longer the prompt, the greater the chance for misalignment between its text
|
982
|
+
# and audio. Therefore, multiple shorter prompts are preferable to a single long
|
983
|
+
# prompt.
|
984
|
+
# * The text of a prompt might include a word that the service does not recognize.
|
985
|
+
# In this case, you can create a custom word and pronunciation pair to tell the
|
986
|
+
# service how to pronounce the word. You must then re-create the prompt.
|
987
|
+
# * The quality of the input audio might be insufficient or the services processing
|
988
|
+
# of the audio might fail to detect the intended prosody. Submitting new audio for
|
989
|
+
# the prompt can correct these issues.
|
990
|
+
#
|
991
|
+
# If a prompt that is created without a speaker ID does not adequately reflect the
|
992
|
+
# intended prosody, enrolling the speaker and providing a speaker ID for the prompt
|
993
|
+
# is one recommended means of potentially improving the quality of the prompt. This
|
994
|
+
# is especially important for shorter prompts such as "good-bye" or "thank you,"
|
995
|
+
# where less audio data makes it more difficult to match the prosody of the speaker.
|
996
|
+
#
|
997
|
+
#
|
998
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
999
|
+
# with US English custom models and voices.
|
1000
|
+
#
|
1001
|
+
# **See also:**
|
1002
|
+
# * [Add a custom
|
1003
|
+
# prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-add-prompt)
|
1004
|
+
# * [Evaluate a custom
|
1005
|
+
# prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-evaluate-prompt)
|
1006
|
+
# * [Rules for creating custom
|
1007
|
+
# prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-prompts).
|
1008
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
1009
|
+
# credentials for the instance of the service that owns the custom model.
|
1010
|
+
# @param prompt_id [String] The identifier of the prompt that is to be added to the custom model:
|
1011
|
+
# * Include a maximum of 49 characters in the ID.
|
1012
|
+
# * Include only alphanumeric characters and `_` (underscores) in the ID.
|
1013
|
+
# * Do not include XML sensitive characters (double quotes, single quotes,
|
1014
|
+
# ampersands, angle brackets, and slashes) in the ID.
|
1015
|
+
# * To add a new prompt, the ID must be unique for the specified custom model.
|
1016
|
+
# Otherwise, the new information for the prompt overwrites the existing prompt that
|
1017
|
+
# has that ID.
|
1018
|
+
# @param metadata [PromptMetadata] Information about the prompt that is to be added to a custom model. The following
|
1019
|
+
# example of a `PromptMetadata` object includes both the required prompt text and an
|
1020
|
+
# optional speaker model ID:
|
1021
|
+
#
|
1022
|
+
# `{ "prompt_text": "Thank you and good-bye!", "speaker_id":
|
1023
|
+
# "823068b2-ed4e-11ea-b6e0-7b6456aa95cc" }`.
|
1024
|
+
# @param file [File] An audio file that speaks the text of the prompt with intonation and prosody that
|
1025
|
+
# matches how you would like the prompt to be spoken.
|
1026
|
+
# * The prompt audio must be in WAV format and must have a minimum sampling rate of
|
1027
|
+
# 16 kHz. The service accepts audio with higher sampling rates. The service
|
1028
|
+
# transcodes all audio to 16 kHz before processing it.
|
1029
|
+
# * The length of the prompt audio is limited to 30 seconds.
|
1030
|
+
# @param filename [String] The filename for file.
|
1031
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1032
|
+
def add_custom_prompt(customization_id:, prompt_id:, metadata:, file:, filename: nil)
|
1033
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
1034
|
+
|
1035
|
+
raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
|
1036
|
+
|
1037
|
+
raise ArgumentError.new("metadata must be provided") if metadata.nil?
|
1038
|
+
|
1039
|
+
raise ArgumentError.new("file must be provided") if file.nil?
|
1040
|
+
|
1041
|
+
headers = {
|
1042
|
+
}
|
1043
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "add_custom_prompt")
|
1044
|
+
headers.merge!(sdk_headers)
|
1045
|
+
|
1046
|
+
form_data = {}
|
1047
|
+
|
1048
|
+
form_data[:metadata] = HTTP::FormData::Part.new(metadata.to_s, content_type: "application/json")
|
1049
|
+
|
1050
|
+
unless file.instance_of?(StringIO) || file.instance_of?(File)
|
1051
|
+
file = file.respond_to?(:to_json) ? StringIO.new(file.to_json) : StringIO.new(file)
|
1052
|
+
end
|
1053
|
+
filename = file.path if filename.nil? && file.respond_to?(:path)
|
1054
|
+
form_data[:file] = HTTP::FormData::File.new(file, content_type: "audio/wav", filename: filename)
|
1055
|
+
|
1056
|
+
method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
|
1057
|
+
|
1058
|
+
response = request(
|
1059
|
+
method: "POST",
|
1060
|
+
url: method_url,
|
1061
|
+
headers: headers,
|
1062
|
+
form: form_data,
|
1063
|
+
accept_json: true
|
1064
|
+
)
|
1065
|
+
response
|
1066
|
+
end
|
1067
|
+
|
1068
|
+
##
|
1069
|
+
# @!method get_custom_prompt(customization_id:, prompt_id:)
|
1070
|
+
# Get a custom prompt.
|
1071
|
+
# Gets information about a specified custom prompt for a specified custom model. The
|
1072
|
+
# information includes the prompt ID, prompt text, status, and optional speaker ID
|
1073
|
+
# for each prompt of the custom model. You must use credentials for the instance of
|
1074
|
+
# the service that owns the custom model.
|
1075
|
+
#
|
1076
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
1077
|
+
# with US English custom models and voices.
|
1078
|
+
#
|
1079
|
+
# **See also:** [Listing custom
|
1080
|
+
# prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list).
|
1081
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
1082
|
+
# credentials for the instance of the service that owns the custom model.
|
1083
|
+
# @param prompt_id [String] The identifier (name) of the prompt.
|
1084
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1085
|
+
def get_custom_prompt(customization_id:, prompt_id:)
|
1086
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
1087
|
+
|
1088
|
+
raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
|
1089
|
+
|
1090
|
+
headers = {
|
1091
|
+
}
|
1092
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "get_custom_prompt")
|
1093
|
+
headers.merge!(sdk_headers)
|
1094
|
+
|
1095
|
+
method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
|
1096
|
+
|
1097
|
+
response = request(
|
1098
|
+
method: "GET",
|
1099
|
+
url: method_url,
|
1100
|
+
headers: headers,
|
1101
|
+
accept_json: true
|
1102
|
+
)
|
1103
|
+
response
|
1104
|
+
end
|
1105
|
+
|
1106
|
+
##
|
1107
|
+
# @!method delete_custom_prompt(customization_id:, prompt_id:)
|
1108
|
+
# Delete a custom prompt.
|
1109
|
+
# Deletes an existing custom prompt from a custom model. The service deletes the
|
1110
|
+
# prompt with the specified ID. You must use credentials for the instance of the
|
1111
|
+
# service that owns the custom model from which the prompt is to be deleted.
|
1112
|
+
#
|
1113
|
+
# **Caution:** Deleting a custom prompt elicits a 400 response code from synthesis
|
1114
|
+
# requests that attempt to use the prompt. Make sure that you do not attempt to use
|
1115
|
+
# a deleted prompt in a production application.
|
1116
|
+
#
|
1117
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
1118
|
+
# with US English custom models and voices.
|
1119
|
+
#
|
1120
|
+
# **See also:** [Deleting a custom
|
1121
|
+
# prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-delete).
|
1122
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
1123
|
+
# credentials for the instance of the service that owns the custom model.
|
1124
|
+
# @param prompt_id [String] The identifier (name) of the prompt that is to be deleted.
|
1125
|
+
# @return [nil]
|
1126
|
+
def delete_custom_prompt(customization_id:, prompt_id:)
|
1127
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
1128
|
+
|
1129
|
+
raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
|
1130
|
+
|
1131
|
+
headers = {
|
1132
|
+
}
|
1133
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "delete_custom_prompt")
|
1134
|
+
headers.merge!(sdk_headers)
|
1135
|
+
|
1136
|
+
method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
|
1137
|
+
|
1138
|
+
request(
|
1139
|
+
method: "DELETE",
|
1140
|
+
url: method_url,
|
1141
|
+
headers: headers,
|
1142
|
+
accept_json: false
|
1143
|
+
)
|
1144
|
+
nil
|
1145
|
+
end
|
1146
|
+
#########################
|
1147
|
+
# Speaker models
|
1148
|
+
#########################
|
1149
|
+
|
1150
|
+
##
|
1151
|
+
# @!method list_speaker_models
|
1152
|
+
# List speaker models.
|
1153
|
+
# Lists information about all speaker models that are defined for a service
|
1154
|
+
# instance. The information includes the speaker ID and speaker name of each defined
|
1155
|
+
# speaker. You must use credentials for the instance of a service to list its
|
1156
|
+
# speakers.
|
1157
|
+
#
|
1158
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1159
|
+
# functionality that is supported only for use with US English custom models and
|
1160
|
+
# voices.
|
1161
|
+
#
|
1162
|
+
# **See also:** [Listing speaker
|
1163
|
+
# models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list).
|
1164
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1165
|
+
def list_speaker_models
|
1166
|
+
headers = {
|
1167
|
+
}
|
1168
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "list_speaker_models")
|
1169
|
+
headers.merge!(sdk_headers)
|
1170
|
+
|
1171
|
+
method_url = "/v1/speakers"
|
1172
|
+
|
1173
|
+
response = request(
|
1174
|
+
method: "GET",
|
1175
|
+
url: method_url,
|
1176
|
+
headers: headers,
|
1177
|
+
accept_json: true
|
1178
|
+
)
|
1179
|
+
response
|
1180
|
+
end
|
1181
|
+
|
1182
|
+
##
|
1183
|
+
# @!method create_speaker_model(speaker_name:, audio:)
|
1184
|
+
# Create a speaker model.
|
1185
|
+
# Creates a new speaker model, which is an optional enrollment token for users who
|
1186
|
+
# are to add prompts to custom models. A speaker model contains information about a
|
1187
|
+
# user's voice. The service extracts this information from a WAV audio sample that
|
1188
|
+
# you pass as the body of the request. Associating a speaker model with a prompt is
|
1189
|
+
# optional, but the information that is extracted from the speaker model helps the
|
1190
|
+
# service learn about the speaker's voice.
|
1191
|
+
#
|
1192
|
+
# A speaker model can make an appreciable difference in the quality of prompts,
|
1193
|
+
# especially short prompts with relatively little audio, that are associated with
|
1194
|
+
# that speaker. A speaker model can help the service produce a prompt with more
|
1195
|
+
# confidence; the lack of a speaker model can potentially compromise the quality of
|
1196
|
+
# a prompt.
|
1197
|
+
#
|
1198
|
+
# The gender of the speaker who creates a speaker model does not need to match the
|
1199
|
+
# gender of a voice that is used with prompts that are associated with that speaker
|
1200
|
+
# model. For example, a speaker model that is created by a male speaker can be
|
1201
|
+
# associated with prompts that are spoken by female voices.
|
1202
|
+
#
|
1203
|
+
# You create a speaker model for a given instance of the service. The new speaker
|
1204
|
+
# model is owned by the service instance whose credentials are used to create it.
|
1205
|
+
# That same speaker can then be used to create prompts for all custom models within
|
1206
|
+
# that service instance. No language is associated with a speaker model, but each
|
1207
|
+
# custom model has a single specified language. You can add prompts only to US
|
1208
|
+
# English models.
|
1209
|
+
#
|
1210
|
+
# You specify a name for the speaker when you create it. The name must be unique
|
1211
|
+
# among all speaker names for the owning service instance. To re-create a speaker
|
1212
|
+
# model for an existing speaker name, you must first delete the existing speaker
|
1213
|
+
# model that has that name.
|
1214
|
+
#
|
1215
|
+
# Speaker enrollment is a synchronous operation. Although it accepts more audio data
|
1216
|
+
# than a prompt, the process of adding a speaker is very fast. The service simply
|
1217
|
+
# extracts information about the speakers voice from the audio. Unlike prompts,
|
1218
|
+
# speaker models neither need nor accept a transcription of the audio. When the call
|
1219
|
+
# returns, the audio is fully processed and the speaker enrollment is complete.
|
1220
|
+
#
|
1221
|
+
# The service returns a speaker ID with the request. A speaker ID is globally unique
|
1222
|
+
# identifier (GUID) that you use to identify the speaker in subsequent requests to
|
1223
|
+
# the service.
|
1224
|
+
#
|
1225
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1226
|
+
# functionality that is supported only for use with US English custom models and
|
1227
|
+
# voices.
|
1228
|
+
#
|
1229
|
+
# **See also:**
|
1230
|
+
# * [Create a speaker
|
1231
|
+
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-speaker-model)
|
1232
|
+
# * [Rules for creating speaker
|
1233
|
+
# models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-speakers).
|
1234
|
+
# @param speaker_name [String] The name of the speaker that is to be added to the service instance.
|
1235
|
+
# * Include a maximum of 49 characters in the name.
|
1236
|
+
# * Include only alphanumeric characters and `_` (underscores) in the name.
|
1237
|
+
# * Do not include XML sensitive characters (double quotes, single quotes,
|
1238
|
+
# ampersands, angle brackets, and slashes) in the name.
|
1239
|
+
# * Do not use the name of an existing speaker that is already defined for the
|
1240
|
+
# service instance.
|
1241
|
+
# @param audio [File] An enrollment audio file that contains a sample of the speakers voice.
|
1242
|
+
# * The enrollment audio must be in WAV format and must have a minimum sampling rate
|
1243
|
+
# of 16 kHz. The service accepts audio with higher sampling rates. It transcodes all
|
1244
|
+
# audio to 16 kHz before processing it.
|
1245
|
+
# * The length of the enrollment audio is limited to 1 minute. Speaking one or two
|
1246
|
+
# paragraphs of text that include five to ten sentences is recommended.
|
1247
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1248
|
+
def create_speaker_model(speaker_name:, audio:)
|
1249
|
+
raise ArgumentError.new("speaker_name must be provided") if speaker_name.nil?
|
1250
|
+
|
1251
|
+
raise ArgumentError.new("audio must be provided") if audio.nil?
|
1252
|
+
|
1253
|
+
headers = {
|
1254
|
+
}
|
1255
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "create_speaker_model")
|
1256
|
+
headers.merge!(sdk_headers)
|
1257
|
+
|
1258
|
+
params = {
|
1259
|
+
"speaker_name" => speaker_name
|
1260
|
+
}
|
1261
|
+
|
1262
|
+
data = audio
|
1263
|
+
headers["Content-Type"] = "audio/wav"
|
1264
|
+
|
1265
|
+
method_url = "/v1/speakers"
|
1266
|
+
|
1267
|
+
response = request(
|
1268
|
+
method: "POST",
|
1269
|
+
url: method_url,
|
1270
|
+
headers: headers,
|
1271
|
+
params: params,
|
1272
|
+
data: data,
|
1273
|
+
accept_json: true
|
1274
|
+
)
|
1275
|
+
response
|
1276
|
+
end
|
1277
|
+
|
1278
|
+
##
|
1279
|
+
# @!method get_speaker_model(speaker_id:)
|
1280
|
+
# Get a speaker model.
|
1281
|
+
# Gets information about all prompts that are defined by a specified speaker for all
|
1282
|
+
# custom models that are owned by a service instance. The information is grouped by
|
1283
|
+
# the customization IDs of the custom models. For each custom model, the information
|
1284
|
+
# lists information about each prompt that is defined for that custom model by the
|
1285
|
+
# speaker. You must use credentials for the instance of the service that owns a
|
1286
|
+
# speaker model to list its prompts.
|
1287
|
+
#
|
1288
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1289
|
+
# functionality that is supported only for use with US English custom models and
|
1290
|
+
# voices.
|
1291
|
+
#
|
1292
|
+
# **See also:** [Listing the custom prompts for a speaker
|
1293
|
+
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list-prompts).
|
1294
|
+
# @param speaker_id [String] The speaker ID (GUID) of the speaker model. You must make the request with service
|
1295
|
+
# credentials for the instance of the service that owns the speaker model.
|
1296
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1297
|
+
def get_speaker_model(speaker_id:)
|
1298
|
+
raise ArgumentError.new("speaker_id must be provided") if speaker_id.nil?
|
1299
|
+
|
1300
|
+
headers = {
|
1301
|
+
}
|
1302
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "get_speaker_model")
|
1303
|
+
headers.merge!(sdk_headers)
|
1304
|
+
|
1305
|
+
method_url = "/v1/speakers/%s" % [ERB::Util.url_encode(speaker_id)]
|
1306
|
+
|
1307
|
+
response = request(
|
1308
|
+
method: "GET",
|
1309
|
+
url: method_url,
|
1310
|
+
headers: headers,
|
1311
|
+
accept_json: true
|
1312
|
+
)
|
1313
|
+
response
|
1314
|
+
end
|
1315
|
+
|
1316
|
+
##
|
1317
|
+
# @!method delete_speaker_model(speaker_id:)
|
1318
|
+
# Delete a speaker model.
|
1319
|
+
# Deletes an existing speaker model from the service instance. The service deletes
|
1320
|
+
# the enrolled speaker with the specified speaker ID. You must use credentials for
|
1321
|
+
# the instance of the service that owns a speaker model to delete the speaker.
|
1322
|
+
#
|
1323
|
+
# Any prompts that are associated with the deleted speaker are not affected by the
|
1324
|
+
# speaker's deletion. The prosodic data that defines the quality of a prompt is
|
1325
|
+
# established when the prompt is created. A prompt is static and remains unaffected
|
1326
|
+
# by deletion of its associated speaker. However, the prompt cannot be resubmitted
|
1327
|
+
# or updated with its original speaker once that speaker is deleted.
|
1328
|
+
#
|
1329
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1330
|
+
# functionality that is supported only for use with US English custom models and
|
1331
|
+
# voices.
|
1332
|
+
#
|
1333
|
+
# **See also:** [Deleting a speaker
|
1334
|
+
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-delete).
|
1335
|
+
# @param speaker_id [String] The speaker ID (GUID) of the speaker model. You must make the request with service
|
1336
|
+
# credentials for the instance of the service that owns the speaker model.
|
1337
|
+
# @return [nil]
|
1338
|
+
def delete_speaker_model(speaker_id:)
|
1339
|
+
raise ArgumentError.new("speaker_id must be provided") if speaker_id.nil?
|
1340
|
+
|
1341
|
+
headers = {
|
1342
|
+
}
|
1343
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "delete_speaker_model")
|
1344
|
+
headers.merge!(sdk_headers)
|
1345
|
+
|
1346
|
+
method_url = "/v1/speakers/%s" % [ERB::Util.url_encode(speaker_id)]
|
1347
|
+
|
1348
|
+
request(
|
1349
|
+
method: "DELETE",
|
1350
|
+
url: method_url,
|
1351
|
+
headers: headers,
|
1352
|
+
accept_json: false
|
1353
|
+
)
|
1354
|
+
nil
|
1355
|
+
end
|
1356
|
+
#########################
|
775
1357
|
# User data
|
776
1358
|
#########################
|
777
1359
|
|