ibm_watson 2.0.1 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -29
- data/lib/ibm_watson/assistant_v1.rb +111 -77
- data/lib/ibm_watson/assistant_v2.rb +83 -59
- data/lib/ibm_watson/compare_comply_v1.rb +11 -4
- data/lib/ibm_watson/discovery_v1.rb +2 -3
- data/lib/ibm_watson/discovery_v2.rb +97 -7
- data/lib/ibm_watson/language_translator_v3.rb +1 -2
- data/lib/ibm_watson/natural_language_classifier_v1.rb +9 -3
- data/lib/ibm_watson/natural_language_understanding_v1.rb +692 -3
- data/lib/ibm_watson/personality_insights_v3.rb +13 -11
- data/lib/ibm_watson/speech_to_text_v1.rb +257 -106
- data/lib/ibm_watson/text_to_speech_v1.rb +599 -19
- data/lib/ibm_watson/tone_analyzer_v3.rb +1 -2
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +1 -2
- data/lib/ibm_watson/visual_recognition_v4.rb +11 -8
- data/test/integration/test_discovery_v2.rb +15 -0
- data/test/integration/test_natural_language_understanding_v1.rb +134 -1
- data/test/integration/test_text_to_speech_v1.rb +57 -0
- data/test/unit/test_discovery_v2.rb +29 -0
- data/test/unit/test_natural_language_understanding_v1.rb +231 -0
- data/test/unit/test_text_to_speech_v1.rb +145 -0
- metadata +7 -7
@@ -14,7 +14,7 @@
|
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
16
|
#
|
17
|
-
# IBM OpenAPI SDK Code Generator Version: 3.
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.31.0-902c9336-20210504-161156
|
18
18
|
#
|
19
19
|
# The IBM Watson™ Text to Speech service provides APIs that use IBM's
|
20
20
|
# speech-synthesis capabilities to synthesize text into natural-sounding speech in a
|
@@ -33,8 +33,12 @@
|
|
33
33
|
# that, when combined, sound like the word. A phonetic translation is based on the SSML
|
34
34
|
# phoneme format for representing a word. You can specify a phonetic translation in
|
35
35
|
# standard International Phonetic Alphabet (IPA) representation or in the proprietary IBM
|
36
|
-
# Symbolic Phonetic Representation (SPR). The Arabic, Chinese, Dutch,
|
37
|
-
# support only IPA.
|
36
|
+
# Symbolic Phonetic Representation (SPR). The Arabic, Chinese, Dutch, Australian English,
|
37
|
+
# and Korean languages support only IPA.
|
38
|
+
#
|
39
|
+
# The service also offers a Tune by Example feature that lets you define custom prompts.
|
40
|
+
# You can also define speaker models to improve the quality of your custom prompts. The
|
41
|
+
# service support custom prompts only for US English custom models and voices.
|
38
42
|
|
39
43
|
require "concurrent"
|
40
44
|
require "erb"
|
@@ -42,7 +46,6 @@ require "json"
|
|
42
46
|
require "ibm_cloud_sdk_core"
|
43
47
|
require_relative "./common.rb"
|
44
48
|
|
45
|
-
# Module for the Watson APIs
|
46
49
|
module IBMWatson
|
47
50
|
##
|
48
51
|
# The Text to Speech V1 service.
|
@@ -117,7 +120,33 @@ module IBMWatson
|
|
117
120
|
#
|
118
121
|
# **See also:** [Listing a specific
|
119
122
|
# voice](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices#listVoice).
|
120
|
-
#
|
123
|
+
#
|
124
|
+
#
|
125
|
+
# ### Important voice updates
|
126
|
+
#
|
127
|
+
# The service's voices underwent significant change on 2 December 2020.
|
128
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
129
|
+
# instead of concatenative.
|
130
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
131
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
132
|
+
# `ar-MS` identifier instead.
|
133
|
+
# * The standard concatenative voices for the following languages are now
|
134
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
135
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
136
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
137
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
138
|
+
# of the service's neural voices.
|
139
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
140
|
+
# for production use.
|
141
|
+
#
|
142
|
+
# The deprecated voices and features will continue to function for at least one year
|
143
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
144
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
145
|
+
# all voice updates, see the [2 December 2020 service
|
146
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
147
|
+
# in the release notes.
|
148
|
+
# @param voice [String] The voice for which information is to be returned. For more information about
|
149
|
+
# specifying a voice, see **Important voice updates** in the method description.
|
121
150
|
# @param customization_id [String] The customization ID (GUID) of a custom model for which information is to be
|
122
151
|
# returned. You must make the request with credentials for the instance of the
|
123
152
|
# service that owns the custom model. Omit the parameter to see information about
|
@@ -213,6 +242,30 @@ module IBMWatson
|
|
213
242
|
# formats](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-audioFormats#audioFormats).
|
214
243
|
#
|
215
244
|
#
|
245
|
+
# ### Important voice updates
|
246
|
+
#
|
247
|
+
# The service's voices underwent significant change on 2 December 2020.
|
248
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
249
|
+
# instead of concatenative.
|
250
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
251
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
252
|
+
# `ar-MS` identifier instead.
|
253
|
+
# * The standard concatenative voices for the following languages are now
|
254
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
255
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
256
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
257
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
258
|
+
# of the service's neural voices.
|
259
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
260
|
+
# for production use.
|
261
|
+
#
|
262
|
+
# The deprecated voices and features will continue to function for at least one year
|
263
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
264
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
265
|
+
# all voice updates, see the [2 December 2020 service
|
266
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
267
|
+
# in the release notes.
|
268
|
+
#
|
216
269
|
# ### Warning messages
|
217
270
|
#
|
218
271
|
# If a request includes invalid query parameters, the service returns a `Warnings`
|
@@ -226,7 +279,8 @@ module IBMWatson
|
|
226
279
|
# the `accept` parameter to specify the audio format. For more information about
|
227
280
|
# specifying an audio format, see **Audio formats (accept types)** in the method
|
228
281
|
# description.
|
229
|
-
# @param voice [String] The voice to use for synthesis.
|
282
|
+
# @param voice [String] The voice to use for synthesis. For more information about specifying a voice, see
|
283
|
+
# **Important voice updates** in the method description.
|
230
284
|
# @param customization_id [String] The customization ID (GUID) of a custom model to use for the synthesis. If a
|
231
285
|
# custom model is specified, it works only if it matches the language of the
|
232
286
|
# indicated voice. You must make the request with credentials for the instance of
|
@@ -277,13 +331,39 @@ module IBMWatson
|
|
277
331
|
#
|
278
332
|
# **See also:** [Querying a word from a
|
279
333
|
# language](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordsQueryLanguage).
|
334
|
+
#
|
335
|
+
#
|
336
|
+
# ### Important voice updates
|
337
|
+
#
|
338
|
+
# The service's voices underwent significant change on 2 December 2020.
|
339
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
340
|
+
# instead of concatenative.
|
341
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
342
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
343
|
+
# `ar-MS` identifier instead.
|
344
|
+
# * The standard concatenative voices for the following languages are now
|
345
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
346
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
347
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
348
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
349
|
+
# of the service's neural voices.
|
350
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
351
|
+
# for production use.
|
352
|
+
#
|
353
|
+
# The deprecated voices and features will continue to function for at least one year
|
354
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
355
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
356
|
+
# all voice updates, see the [2 December 2020 service
|
357
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
358
|
+
# in the release notes.
|
280
359
|
# @param text [String] The word for which the pronunciation is requested.
|
281
360
|
# @param voice [String] A voice that specifies the language in which the pronunciation is to be returned.
|
282
361
|
# All voices for the same language (for example, `en-US`) return the same
|
283
|
-
# translation.
|
362
|
+
# translation. For more information about specifying a voice, see **Important voice
|
363
|
+
# updates** in the method description.
|
284
364
|
# @param format [String] The phoneme format in which to return the pronunciation. The Arabic, Chinese,
|
285
|
-
# Dutch, and Korean languages support only IPA. Omit the
|
286
|
-
# pronunciation in the default format.
|
365
|
+
# Dutch, Australian English, and Korean languages support only IPA. Omit the
|
366
|
+
# parameter to obtain the pronunciation in the default format.
|
287
367
|
# @param customization_id [String] The customization ID (GUID) of a custom model for which the pronunciation is to be
|
288
368
|
# returned. The language of a specified custom model must match the language of the
|
289
369
|
# specified voice. If the word is not defined in the specified custom model, the
|
@@ -332,11 +412,37 @@ module IBMWatson
|
|
332
412
|
#
|
333
413
|
# **See also:** [Creating a custom
|
334
414
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsCreate).
|
415
|
+
#
|
416
|
+
#
|
417
|
+
# ### Important voice updates
|
418
|
+
#
|
419
|
+
# The service's voices underwent significant change on 2 December 2020.
|
420
|
+
# * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
|
421
|
+
# instead of concatenative.
|
422
|
+
# * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
|
423
|
+
# * The `ar-AR` language identifier cannot be used to create a custom model. Use the
|
424
|
+
# `ar-MS` identifier instead.
|
425
|
+
# * The standard concatenative voices for the following languages are now
|
426
|
+
# deprecated: Brazilian Portuguese, United Kingdom and United States English,
|
427
|
+
# French, German, Italian, Japanese, and Spanish (all dialects).
|
428
|
+
# * The features expressive SSML, voice transformation SSML, and use of the `volume`
|
429
|
+
# attribute of the `<prosody>` element are deprecated and are not supported with any
|
430
|
+
# of the service's neural voices.
|
431
|
+
# * All of the service's voices are now customizable and generally available (GA)
|
432
|
+
# for production use.
|
433
|
+
#
|
434
|
+
# The deprecated voices and features will continue to function for at least one year
|
435
|
+
# but might be removed at a future date. You are encouraged to migrate to the
|
436
|
+
# equivalent neural voices at your earliest convenience. For more information about
|
437
|
+
# all voice updates, see the [2 December 2020 service
|
438
|
+
# update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
|
439
|
+
# in the release notes.
|
335
440
|
# @param name [String] The name of the new custom model.
|
336
441
|
# @param language [String] The language of the new custom model. You create a custom model for a specific
|
337
|
-
# language, not for a specific voice. A custom model can be used with any voice
|
338
|
-
#
|
339
|
-
#
|
442
|
+
# language, not for a specific voice. A custom model can be used with any voice for
|
443
|
+
# its specified language. Omit the parameter to use the the default language,
|
444
|
+
# `en-US`. **Note:** The `ar-AR` language identifier cannot be used to create a
|
445
|
+
# custom model. Use the `ar-MS` identifier instead.
|
340
446
|
# @param description [String] A description of the new custom model. Specifying a description is recommended.
|
341
447
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
342
448
|
def create_custom_model(name:, language: nil, description: nil)
|
@@ -370,8 +476,8 @@ module IBMWatson
|
|
370
476
|
# List custom models.
|
371
477
|
# Lists metadata such as the name and description for all custom models that are
|
372
478
|
# owned by an instance of the service. Specify a language to list the custom models
|
373
|
-
# for that language only. To see the words in addition to the metadata
|
374
|
-
# specific custom model, use the **
|
479
|
+
# for that language only. To see the words and prompts in addition to the metadata
|
480
|
+
# for a specific custom model, use the **Get a custom model** method. You must use
|
375
481
|
# credentials for the instance of the service that owns a model to list information
|
376
482
|
# about it.
|
377
483
|
#
|
@@ -473,8 +579,9 @@ module IBMWatson
|
|
473
579
|
# Get a custom model.
|
474
580
|
# Gets all information about a specified custom model. In addition to metadata such
|
475
581
|
# as the name and description of the custom model, the output includes the words and
|
476
|
-
# their translations
|
477
|
-
# use the **List custom
|
582
|
+
# their translations that are defined for the model, as well as any prompts that are
|
583
|
+
# defined for the model. To see just the metadata for a model, use the **List custom
|
584
|
+
# models** method.
|
478
585
|
#
|
479
586
|
# **See also:** [Querying a custom
|
480
587
|
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsQuery).
|
@@ -666,9 +773,9 @@ module IBMWatson
|
|
666
773
|
# @param word [String] The word that is to be added or updated for the custom model.
|
667
774
|
# @param translation [String] The phonetic or sounds-like translation for the word. A phonetic translation is
|
668
775
|
# based on the SSML format for representing the phonetic string of a word either as
|
669
|
-
# an IPA translation or as an IBM SPR translation. The Arabic, Chinese, Dutch,
|
670
|
-
# Korean languages support only IPA. A sounds-like is one or
|
671
|
-
# combined, sound like the word.
|
776
|
+
# an IPA translation or as an IBM SPR translation. The Arabic, Chinese, Dutch,
|
777
|
+
# Australian English, and Korean languages support only IPA. A sounds-like is one or
|
778
|
+
# more words that, when combined, sound like the word.
|
672
779
|
# @param part_of_speech [String] **Japanese only.** The part of speech for the word. The service uses the value to
|
673
780
|
# produce the correct intonation for the word. You can create only a single entry,
|
674
781
|
# with or without a single part of speech, for any word; you cannot create multiple
|
@@ -772,6 +879,479 @@ module IBMWatson
|
|
772
879
|
nil
|
773
880
|
end
|
774
881
|
#########################
|
882
|
+
# Custom prompts
|
883
|
+
#########################
|
884
|
+
|
885
|
+
##
|
886
|
+
# @!method list_custom_prompts(customization_id:)
|
887
|
+
# List custom prompts.
|
888
|
+
# Lists information about all custom prompts that are defined for a custom model.
|
889
|
+
# The information includes the prompt ID, prompt text, status, and optional speaker
|
890
|
+
# ID for each prompt of the custom model. You must use credentials for the instance
|
891
|
+
# of the service that owns the custom model. The same information about all of the
|
892
|
+
# prompts for a custom model is also provided by the **Get a custom model** method.
|
893
|
+
# That method provides complete details about a specified custom model, including
|
894
|
+
# its language, owner, custom words, and more.
|
895
|
+
#
|
896
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
897
|
+
# with US English custom models and voices.
|
898
|
+
#
|
899
|
+
# **See also:** [Listing custom
|
900
|
+
# prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list).
|
901
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
902
|
+
# credentials for the instance of the service that owns the custom model.
|
903
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
904
|
+
def list_custom_prompts(customization_id:)
|
905
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
906
|
+
|
907
|
+
headers = {
|
908
|
+
}
|
909
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "list_custom_prompts")
|
910
|
+
headers.merge!(sdk_headers)
|
911
|
+
|
912
|
+
method_url = "/v1/customizations/%s/prompts" % [ERB::Util.url_encode(customization_id)]
|
913
|
+
|
914
|
+
response = request(
|
915
|
+
method: "GET",
|
916
|
+
url: method_url,
|
917
|
+
headers: headers,
|
918
|
+
accept_json: true
|
919
|
+
)
|
920
|
+
response
|
921
|
+
end
|
922
|
+
|
923
|
+
##
|
924
|
+
# @!method add_custom_prompt(customization_id:, prompt_id:, metadata:, file:)
|
925
|
+
# Add a custom prompt.
|
926
|
+
# Adds a custom prompt to a custom model. A prompt is defined by the text that is to
|
927
|
+
# be spoken, the audio for that text, a unique user-specified ID for the prompt, and
|
928
|
+
# an optional speaker ID. The information is used to generate prosodic data that is
|
929
|
+
# not visible to the user. This data is used by the service to produce the
|
930
|
+
# synthesized audio upon request. You must use credentials for the instance of the
|
931
|
+
# service that owns a custom model to add a prompt to it. You can add a maximum of
|
932
|
+
# 1000 custom prompts to a single custom model.
|
933
|
+
#
|
934
|
+
# You are recommended to assign meaningful values for prompt IDs. For example, use
|
935
|
+
# `goodbye` to identify a prompt that speaks a farewell message. Prompt IDs must be
|
936
|
+
# unique within a given custom model. You cannot define two prompts with the same
|
937
|
+
# name for the same custom model. If you provide the ID of an existing prompt, the
|
938
|
+
# previously uploaded prompt is replaced by the new information. The existing prompt
|
939
|
+
# is reprocessed by using the new text and audio and, if provided, new speaker
|
940
|
+
# model, and the prosody data associated with the prompt is updated.
|
941
|
+
#
|
942
|
+
# The quality of a prompt is undefined if the language of a prompt does not match
|
943
|
+
# the language of its custom model. This is consistent with any text or SSML that is
|
944
|
+
# specified for a speech synthesis request. The service makes a best-effort attempt
|
945
|
+
# to render the specified text for the prompt; it does not validate that the
|
946
|
+
# language of the text matches the language of the model.
|
947
|
+
#
|
948
|
+
# Adding a prompt is an asynchronous operation. Although it accepts less audio than
|
949
|
+
# speaker enrollment, the service must align the audio with the provided text. The
|
950
|
+
# time that it takes to process a prompt depends on the prompt itself. The
|
951
|
+
# processing time for a reasonably sized prompt generally matches the length of the
|
952
|
+
# audio (for example, it takes 20 seconds to process a 20-second prompt).
|
953
|
+
#
|
954
|
+
# For shorter prompts, you can wait for a reasonable amount of time and then check
|
955
|
+
# the status of the prompt with the **Get a custom prompt** method. For longer
|
956
|
+
# prompts, consider using that method to poll the service every few seconds to
|
957
|
+
# determine when the prompt becomes available. No prompt can be used for speech
|
958
|
+
# synthesis if it is in the `processing` or `failed` state. Only prompts that are in
|
959
|
+
# the `available` state can be used for speech synthesis.
|
960
|
+
#
|
961
|
+
# When it processes a request, the service attempts to align the text and the audio
|
962
|
+
# that are provided for the prompt. The text that is passed with a prompt must match
|
963
|
+
# the spoken audio as closely as possible. Optimally, the text and audio match
|
964
|
+
# exactly. The service does its best to align the specified text with the audio, and
|
965
|
+
# it can often compensate for mismatches between the two. But if the service cannot
|
966
|
+
# effectively align the text and the audio, possibly because the magnitude of
|
967
|
+
# mismatches between the two is too great, processing of the prompt fails.
|
968
|
+
#
|
969
|
+
# ### Evaluating a prompt
|
970
|
+
#
|
971
|
+
# Always listen to and evaluate a prompt to determine its quality before using it
|
972
|
+
# in production. To evaluate a prompt, include only the single prompt in a speech
|
973
|
+
# synthesis request by using the following SSML extension, in this case for a prompt
|
974
|
+
# whose ID is `goodbye`:
|
975
|
+
#
|
976
|
+
# `<ibm:prompt id="goodbye"/>`
|
977
|
+
#
|
978
|
+
# In some cases, you might need to rerecord and resubmit a prompt as many as five
|
979
|
+
# times to address the following possible problems:
|
980
|
+
# * The service might fail to detect a mismatch between the prompts text and audio.
|
981
|
+
# The longer the prompt, the greater the chance for misalignment between its text
|
982
|
+
# and audio. Therefore, multiple shorter prompts are preferable to a single long
|
983
|
+
# prompt.
|
984
|
+
# * The text of a prompt might include a word that the service does not recognize.
|
985
|
+
# In this case, you can create a custom word and pronunciation pair to tell the
|
986
|
+
# service how to pronounce the word. You must then re-create the prompt.
|
987
|
+
# * The quality of the input audio might be insufficient or the services processing
|
988
|
+
# of the audio might fail to detect the intended prosody. Submitting new audio for
|
989
|
+
# the prompt can correct these issues.
|
990
|
+
#
|
991
|
+
# If a prompt that is created without a speaker ID does not adequately reflect the
|
992
|
+
# intended prosody, enrolling the speaker and providing a speaker ID for the prompt
|
993
|
+
# is one recommended means of potentially improving the quality of the prompt. This
|
994
|
+
# is especially important for shorter prompts such as "good-bye" or "thank you,"
|
995
|
+
# where less audio data makes it more difficult to match the prosody of the speaker.
|
996
|
+
#
|
997
|
+
#
|
998
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
999
|
+
# with US English custom models and voices.
|
1000
|
+
#
|
1001
|
+
# **See also:**
|
1002
|
+
# * [Add a custom
|
1003
|
+
# prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-add-prompt)
|
1004
|
+
# * [Evaluate a custom
|
1005
|
+
# prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-evaluate-prompt)
|
1006
|
+
# * [Rules for creating custom
|
1007
|
+
# prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-prompts).
|
1008
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
1009
|
+
# credentials for the instance of the service that owns the custom model.
|
1010
|
+
# @param prompt_id [String] The identifier of the prompt that is to be added to the custom model:
|
1011
|
+
# * Include a maximum of 49 characters in the ID.
|
1012
|
+
# * Include only alphanumeric characters and `_` (underscores) in the ID.
|
1013
|
+
# * Do not include XML sensitive characters (double quotes, single quotes,
|
1014
|
+
# ampersands, angle brackets, and slashes) in the ID.
|
1015
|
+
# * To add a new prompt, the ID must be unique for the specified custom model.
|
1016
|
+
# Otherwise, the new information for the prompt overwrites the existing prompt that
|
1017
|
+
# has that ID.
|
1018
|
+
# @param metadata [PromptMetadata] Information about the prompt that is to be added to a custom model. The following
|
1019
|
+
# example of a `PromptMetadata` object includes both the required prompt text and an
|
1020
|
+
# optional speaker model ID:
|
1021
|
+
#
|
1022
|
+
# `{ "prompt_text": "Thank you and good-bye!", "speaker_id":
|
1023
|
+
# "823068b2-ed4e-11ea-b6e0-7b6456aa95cc" }`.
|
1024
|
+
# @param file [File] An audio file that speaks the text of the prompt with intonation and prosody that
|
1025
|
+
# matches how you would like the prompt to be spoken.
|
1026
|
+
# * The prompt audio must be in WAV format and must have a minimum sampling rate of
|
1027
|
+
# 16 kHz. The service accepts audio with higher sampling rates. The service
|
1028
|
+
# transcodes all audio to 16 kHz before processing it.
|
1029
|
+
# * The length of the prompt audio is limited to 30 seconds.
|
1030
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1031
|
+
def add_custom_prompt(customization_id:, prompt_id:, metadata:, file:)
|
1032
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
1033
|
+
|
1034
|
+
raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
|
1035
|
+
|
1036
|
+
raise ArgumentError.new("metadata must be provided") if metadata.nil?
|
1037
|
+
|
1038
|
+
raise ArgumentError.new("file must be provided") if file.nil?
|
1039
|
+
|
1040
|
+
headers = {
|
1041
|
+
}
|
1042
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "add_custom_prompt")
|
1043
|
+
headers.merge!(sdk_headers)
|
1044
|
+
|
1045
|
+
form_data = {}
|
1046
|
+
|
1047
|
+
form_data[:metadata] = HTTP::FormData::Part.new(metadata.to_s, content_type: "application/json")
|
1048
|
+
|
1049
|
+
unless file.instance_of?(StringIO) || file.instance_of?(File)
|
1050
|
+
file = file.respond_to?(:to_json) ? StringIO.new(file.to_json) : StringIO.new(file)
|
1051
|
+
end
|
1052
|
+
form_data[:file] = HTTP::FormData::File.new(file, content_type: "audio/wav", filename: file.respond_to?(:path) ? file.path : nil)
|
1053
|
+
|
1054
|
+
method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
|
1055
|
+
|
1056
|
+
response = request(
|
1057
|
+
method: "POST",
|
1058
|
+
url: method_url,
|
1059
|
+
headers: headers,
|
1060
|
+
form: form_data,
|
1061
|
+
accept_json: true
|
1062
|
+
)
|
1063
|
+
response
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
##
|
1067
|
+
# @!method get_custom_prompt(customization_id:, prompt_id:)
|
1068
|
+
# Get a custom prompt.
|
1069
|
+
# Gets information about a specified custom prompt for a specified custom model. The
|
1070
|
+
# information includes the prompt ID, prompt text, status, and optional speaker ID
|
1071
|
+
# for each prompt of the custom model. You must use credentials for the instance of
|
1072
|
+
# the service that owns the custom model.
|
1073
|
+
#
|
1074
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
1075
|
+
# with US English custom models and voices.
|
1076
|
+
#
|
1077
|
+
# **See also:** [Listing custom
|
1078
|
+
# prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list).
|
1079
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
1080
|
+
# credentials for the instance of the service that owns the custom model.
|
1081
|
+
# @param prompt_id [String] The identifier (name) of the prompt.
|
1082
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1083
|
+
def get_custom_prompt(customization_id:, prompt_id:)
|
1084
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
1085
|
+
|
1086
|
+
raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
|
1087
|
+
|
1088
|
+
headers = {
|
1089
|
+
}
|
1090
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "get_custom_prompt")
|
1091
|
+
headers.merge!(sdk_headers)
|
1092
|
+
|
1093
|
+
method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
|
1094
|
+
|
1095
|
+
response = request(
|
1096
|
+
method: "GET",
|
1097
|
+
url: method_url,
|
1098
|
+
headers: headers,
|
1099
|
+
accept_json: true
|
1100
|
+
)
|
1101
|
+
response
|
1102
|
+
end
|
1103
|
+
|
1104
|
+
##
|
1105
|
+
# @!method delete_custom_prompt(customization_id:, prompt_id:)
|
1106
|
+
# Delete a custom prompt.
|
1107
|
+
# Deletes an existing custom prompt from a custom model. The service deletes the
|
1108
|
+
# prompt with the specified ID. You must use credentials for the instance of the
|
1109
|
+
# service that owns the custom model from which the prompt is to be deleted.
|
1110
|
+
#
|
1111
|
+
# **Caution:** Deleting a custom prompt elicits a 400 response code from synthesis
|
1112
|
+
# requests that attempt to use the prompt. Make sure that you do not attempt to use
|
1113
|
+
# a deleted prompt in a production application.
|
1114
|
+
#
|
1115
|
+
# **Beta:** Custom prompts are beta functionality that is supported only for use
|
1116
|
+
# with US English custom models and voices.
|
1117
|
+
#
|
1118
|
+
# **See also:** [Deleting a custom
|
1119
|
+
# prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-delete).
|
1120
|
+
# @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
|
1121
|
+
# credentials for the instance of the service that owns the custom model.
|
1122
|
+
# @param prompt_id [String] The identifier (name) of the prompt that is to be deleted.
|
1123
|
+
# @return [nil]
|
1124
|
+
def delete_custom_prompt(customization_id:, prompt_id:)
|
1125
|
+
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
1126
|
+
|
1127
|
+
raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
|
1128
|
+
|
1129
|
+
headers = {
|
1130
|
+
}
|
1131
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "delete_custom_prompt")
|
1132
|
+
headers.merge!(sdk_headers)
|
1133
|
+
|
1134
|
+
method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
|
1135
|
+
|
1136
|
+
request(
|
1137
|
+
method: "DELETE",
|
1138
|
+
url: method_url,
|
1139
|
+
headers: headers,
|
1140
|
+
accept_json: false
|
1141
|
+
)
|
1142
|
+
nil
|
1143
|
+
end
|
1144
|
+
#########################
|
1145
|
+
# Speaker models
|
1146
|
+
#########################
|
1147
|
+
|
1148
|
+
##
|
1149
|
+
# @!method list_speaker_models
|
1150
|
+
# List speaker models.
|
1151
|
+
# Lists information about all speaker models that are defined for a service
|
1152
|
+
# instance. The information includes the speaker ID and speaker name of each defined
|
1153
|
+
# speaker. You must use credentials for the instance of a service to list its
|
1154
|
+
# speakers.
|
1155
|
+
#
|
1156
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1157
|
+
# functionality that is supported only for use with US English custom models and
|
1158
|
+
# voices.
|
1159
|
+
#
|
1160
|
+
# **See also:** [Listing speaker
|
1161
|
+
# models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list).
|
1162
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1163
|
+
def list_speaker_models
|
1164
|
+
headers = {
|
1165
|
+
}
|
1166
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "list_speaker_models")
|
1167
|
+
headers.merge!(sdk_headers)
|
1168
|
+
|
1169
|
+
method_url = "/v1/speakers"
|
1170
|
+
|
1171
|
+
response = request(
|
1172
|
+
method: "GET",
|
1173
|
+
url: method_url,
|
1174
|
+
headers: headers,
|
1175
|
+
accept_json: true
|
1176
|
+
)
|
1177
|
+
response
|
1178
|
+
end
|
1179
|
+
|
1180
|
+
##
|
1181
|
+
# @!method create_speaker_model(speaker_name:, audio:)
|
1182
|
+
# Create a speaker model.
|
1183
|
+
# Creates a new speaker model, which is an optional enrollment token for users who
|
1184
|
+
# are to add prompts to custom models. A speaker model contains information about a
|
1185
|
+
# user's voice. The service extracts this information from a WAV audio sample that
|
1186
|
+
# you pass as the body of the request. Associating a speaker model with a prompt is
|
1187
|
+
# optional, but the information that is extracted from the speaker model helps the
|
1188
|
+
# service learn about the speaker's voice.
|
1189
|
+
#
|
1190
|
+
# A speaker model can make an appreciable difference in the quality of prompts,
|
1191
|
+
# especially short prompts with relatively little audio, that are associated with
|
1192
|
+
# that speaker. A speaker model can help the service produce a prompt with more
|
1193
|
+
# confidence; the lack of a speaker model can potentially compromise the quality of
|
1194
|
+
# a prompt.
|
1195
|
+
#
|
1196
|
+
# The gender of the speaker who creates a speaker model does not need to match the
|
1197
|
+
# gender of a voice that is used with prompts that are associated with that speaker
|
1198
|
+
# model. For example, a speaker model that is created by a male speaker can be
|
1199
|
+
# associated with prompts that are spoken by female voices.
|
1200
|
+
#
|
1201
|
+
# You create a speaker model for a given instance of the service. The new speaker
|
1202
|
+
# model is owned by the service instance whose credentials are used to create it.
|
1203
|
+
# That same speaker can then be used to create prompts for all custom models within
|
1204
|
+
# that service instance. No language is associated with a speaker model, but each
|
1205
|
+
# custom model has a single specified language. You can add prompts only to US
|
1206
|
+
# English models.
|
1207
|
+
#
|
1208
|
+
# You specify a name for the speaker when you create it. The name must be unique
|
1209
|
+
# among all speaker names for the owning service instance. To re-create a speaker
|
1210
|
+
# model for an existing speaker name, you must first delete the existing speaker
|
1211
|
+
# model that has that name.
|
1212
|
+
#
|
1213
|
+
# Speaker enrollment is a synchronous operation. Although it accepts more audio data
|
1214
|
+
# than a prompt, the process of adding a speaker is very fast. The service simply
|
1215
|
+
# extracts information about the speakers voice from the audio. Unlike prompts,
|
1216
|
+
# speaker models neither need nor accept a transcription of the audio. When the call
|
1217
|
+
# returns, the audio is fully processed and the speaker enrollment is complete.
|
1218
|
+
#
|
1219
|
+
# The service returns a speaker ID with the request. A speaker ID is globally unique
|
1220
|
+
# identifier (GUID) that you use to identify the speaker in subsequent requests to
|
1221
|
+
# the service.
|
1222
|
+
#
|
1223
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1224
|
+
# functionality that is supported only for use with US English custom models and
|
1225
|
+
# voices.
|
1226
|
+
#
|
1227
|
+
# **See also:**
|
1228
|
+
# * [Create a speaker
|
1229
|
+
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-speaker-model)
|
1230
|
+
# * [Rules for creating speaker
|
1231
|
+
# models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-speakers).
|
1232
|
+
# @param speaker_name [String] The name of the speaker that is to be added to the service instance.
|
1233
|
+
# * Include a maximum of 49 characters in the name.
|
1234
|
+
# * Include only alphanumeric characters and `_` (underscores) in the name.
|
1235
|
+
# * Do not include XML sensitive characters (double quotes, single quotes,
|
1236
|
+
# ampersands, angle brackets, and slashes) in the name.
|
1237
|
+
# * Do not use the name of an existing speaker that is already defined for the
|
1238
|
+
# service instance.
|
1239
|
+
# @param audio [File] An enrollment audio file that contains a sample of the speakers voice.
|
1240
|
+
# * The enrollment audio must be in WAV format and must have a minimum sampling rate
|
1241
|
+
# of 16 kHz. The service accepts audio with higher sampling rates. It transcodes all
|
1242
|
+
# audio to 16 kHz before processing it.
|
1243
|
+
# * The length of the enrollment audio is limited to 1 minute. Speaking one or two
|
1244
|
+
# paragraphs of text that include five to ten sentences is recommended.
|
1245
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1246
|
+
def create_speaker_model(speaker_name:, audio:)
|
1247
|
+
raise ArgumentError.new("speaker_name must be provided") if speaker_name.nil?
|
1248
|
+
|
1249
|
+
raise ArgumentError.new("audio must be provided") if audio.nil?
|
1250
|
+
|
1251
|
+
headers = {
|
1252
|
+
}
|
1253
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "create_speaker_model")
|
1254
|
+
headers.merge!(sdk_headers)
|
1255
|
+
|
1256
|
+
params = {
|
1257
|
+
"speaker_name" => speaker_name
|
1258
|
+
}
|
1259
|
+
|
1260
|
+
data = audio
|
1261
|
+
headers["Content-Type"] = "audio/wav"
|
1262
|
+
|
1263
|
+
method_url = "/v1/speakers"
|
1264
|
+
|
1265
|
+
response = request(
|
1266
|
+
method: "POST",
|
1267
|
+
url: method_url,
|
1268
|
+
headers: headers,
|
1269
|
+
params: params,
|
1270
|
+
data: data,
|
1271
|
+
accept_json: true
|
1272
|
+
)
|
1273
|
+
response
|
1274
|
+
end
|
1275
|
+
|
1276
|
+
##
|
1277
|
+
# @!method get_speaker_model(speaker_id:)
|
1278
|
+
# Get a speaker model.
|
1279
|
+
# Gets information about all prompts that are defined by a specified speaker for all
|
1280
|
+
# custom models that are owned by a service instance. The information is grouped by
|
1281
|
+
# the customization IDs of the custom models. For each custom model, the information
|
1282
|
+
# lists information about each prompt that is defined for that custom model by the
|
1283
|
+
# speaker. You must use credentials for the instance of the service that owns a
|
1284
|
+
# speaker model to list its prompts.
|
1285
|
+
#
|
1286
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1287
|
+
# functionality that is supported only for use with US English custom models and
|
1288
|
+
# voices.
|
1289
|
+
#
|
1290
|
+
# **See also:** [Listing the custom prompts for a speaker
|
1291
|
+
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list-prompts).
|
1292
|
+
# @param speaker_id [String] The speaker ID (GUID) of the speaker model. You must make the request with service
|
1293
|
+
# credentials for the instance of the service that owns the speaker model.
|
1294
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1295
|
+
def get_speaker_model(speaker_id:)
|
1296
|
+
raise ArgumentError.new("speaker_id must be provided") if speaker_id.nil?
|
1297
|
+
|
1298
|
+
headers = {
|
1299
|
+
}
|
1300
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "get_speaker_model")
|
1301
|
+
headers.merge!(sdk_headers)
|
1302
|
+
|
1303
|
+
method_url = "/v1/speakers/%s" % [ERB::Util.url_encode(speaker_id)]
|
1304
|
+
|
1305
|
+
response = request(
|
1306
|
+
method: "GET",
|
1307
|
+
url: method_url,
|
1308
|
+
headers: headers,
|
1309
|
+
accept_json: true
|
1310
|
+
)
|
1311
|
+
response
|
1312
|
+
end
|
1313
|
+
|
1314
|
+
##
|
1315
|
+
# @!method delete_speaker_model(speaker_id:)
|
1316
|
+
# Delete a speaker model.
|
1317
|
+
# Deletes an existing speaker model from the service instance. The service deletes
|
1318
|
+
# the enrolled speaker with the specified speaker ID. You must use credentials for
|
1319
|
+
# the instance of the service that owns a speaker model to delete the speaker.
|
1320
|
+
#
|
1321
|
+
# Any prompts that are associated with the deleted speaker are not affected by the
|
1322
|
+
# speaker's deletion. The prosodic data that defines the quality of a prompt is
|
1323
|
+
# established when the prompt is created. A prompt is static and remains unaffected
|
1324
|
+
# by deletion of its associated speaker. However, the prompt cannot be resubmitted
|
1325
|
+
# or updated with its original speaker once that speaker is deleted.
|
1326
|
+
#
|
1327
|
+
# **Beta:** Speaker models and the custom prompts with which they are used are beta
|
1328
|
+
# functionality that is supported only for use with US English custom models and
|
1329
|
+
# voices.
|
1330
|
+
#
|
1331
|
+
# **See also:** [Deleting a speaker
|
1332
|
+
# model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-delete).
|
1333
|
+
# @param speaker_id [String] The speaker ID (GUID) of the speaker model. You must make the request with service
|
1334
|
+
# credentials for the instance of the service that owns the speaker model.
|
1335
|
+
# @return [nil]
|
1336
|
+
def delete_speaker_model(speaker_id:)
|
1337
|
+
raise ArgumentError.new("speaker_id must be provided") if speaker_id.nil?
|
1338
|
+
|
1339
|
+
headers = {
|
1340
|
+
}
|
1341
|
+
sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "delete_speaker_model")
|
1342
|
+
headers.merge!(sdk_headers)
|
1343
|
+
|
1344
|
+
method_url = "/v1/speakers/%s" % [ERB::Util.url_encode(speaker_id)]
|
1345
|
+
|
1346
|
+
request(
|
1347
|
+
method: "DELETE",
|
1348
|
+
url: method_url,
|
1349
|
+
headers: headers,
|
1350
|
+
accept_json: false
|
1351
|
+
)
|
1352
|
+
nil
|
1353
|
+
end
|
1354
|
+
#########################
|
775
1355
|
# User data
|
776
1356
|
#########################
|
777
1357
|
|