ibm_watson 2.0.2 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,7 +14,7 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
  #
17
- # IBM OpenAPI SDK Code Generator Version: 3.19.0-be3b4618-20201113-200858
17
+ # IBM OpenAPI SDK Code Generator Version: 3.31.0-902c9336-20210504-161156
18
18
  #
19
19
  # The IBM Watson™ Text to Speech service provides APIs that use IBM's
20
20
  # speech-synthesis capabilities to synthesize text into natural-sounding speech in a
@@ -33,8 +33,12 @@
33
33
  # that, when combined, sound like the word. A phonetic translation is based on the SSML
34
34
  # phoneme format for representing a word. You can specify a phonetic translation in
35
35
  # standard International Phonetic Alphabet (IPA) representation or in the proprietary IBM
36
- # Symbolic Phonetic Representation (SPR). The Arabic, Chinese, Dutch, and Korean languages
37
- # support only IPA.
36
+ # Symbolic Phonetic Representation (SPR). The Arabic, Chinese, Dutch, Australian English,
37
+ # and Korean languages support only IPA.
38
+ #
39
+ # The service also offers a Tune by Example feature that lets you define custom prompts.
40
+ # You can also define speaker models to improve the quality of your custom prompts. The
41
+ # service support custom prompts only for US English custom models and voices.
38
42
 
39
43
  require "concurrent"
40
44
  require "erb"
@@ -42,7 +46,6 @@ require "json"
42
46
  require "ibm_cloud_sdk_core"
43
47
  require_relative "./common.rb"
44
48
 
45
- # Module for the Watson APIs
46
49
  module IBMWatson
47
50
  ##
48
51
  # The Text to Speech V1 service.
@@ -117,7 +120,33 @@ module IBMWatson
117
120
  #
118
121
  # **See also:** [Listing a specific
119
122
  # voice](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices#listVoice).
120
- # @param voice [String] The voice for which information is to be returned.
123
+ #
124
+ #
125
+ # ### Important voice updates
126
+ #
127
+ # The service's voices underwent significant change on 2 December 2020.
128
+ # * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
129
+ # instead of concatenative.
130
+ # * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
131
+ # * The `ar-AR` language identifier cannot be used to create a custom model. Use the
132
+ # `ar-MS` identifier instead.
133
+ # * The standard concatenative voices for the following languages are now
134
+ # deprecated: Brazilian Portuguese, United Kingdom and United States English,
135
+ # French, German, Italian, Japanese, and Spanish (all dialects).
136
+ # * The features expressive SSML, voice transformation SSML, and use of the `volume`
137
+ # attribute of the `<prosody>` element are deprecated and are not supported with any
138
+ # of the service's neural voices.
139
+ # * All of the service's voices are now customizable and generally available (GA)
140
+ # for production use.
141
+ #
142
+ # The deprecated voices and features will continue to function for at least one year
143
+ # but might be removed at a future date. You are encouraged to migrate to the
144
+ # equivalent neural voices at your earliest convenience. For more information about
145
+ # all voice updates, see the [2 December 2020 service
146
+ # update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
147
+ # in the release notes.
148
+ # @param voice [String] The voice for which information is to be returned. For more information about
149
+ # specifying a voice, see **Important voice updates** in the method description.
121
150
  # @param customization_id [String] The customization ID (GUID) of a custom model for which information is to be
122
151
  # returned. You must make the request with credentials for the instance of the
123
152
  # service that owns the custom model. Omit the parameter to see information about
@@ -213,6 +242,30 @@ module IBMWatson
213
242
  # formats](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-audioFormats#audioFormats).
214
243
  #
215
244
  #
245
+ # ### Important voice updates
246
+ #
247
+ # The service's voices underwent significant change on 2 December 2020.
248
+ # * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
249
+ # instead of concatenative.
250
+ # * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
251
+ # * The `ar-AR` language identifier cannot be used to create a custom model. Use the
252
+ # `ar-MS` identifier instead.
253
+ # * The standard concatenative voices for the following languages are now
254
+ # deprecated: Brazilian Portuguese, United Kingdom and United States English,
255
+ # French, German, Italian, Japanese, and Spanish (all dialects).
256
+ # * The features expressive SSML, voice transformation SSML, and use of the `volume`
257
+ # attribute of the `<prosody>` element are deprecated and are not supported with any
258
+ # of the service's neural voices.
259
+ # * All of the service's voices are now customizable and generally available (GA)
260
+ # for production use.
261
+ #
262
+ # The deprecated voices and features will continue to function for at least one year
263
+ # but might be removed at a future date. You are encouraged to migrate to the
264
+ # equivalent neural voices at your earliest convenience. For more information about
265
+ # all voice updates, see the [2 December 2020 service
266
+ # update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
267
+ # in the release notes.
268
+ #
216
269
  # ### Warning messages
217
270
  #
218
271
  # If a request includes invalid query parameters, the service returns a `Warnings`
@@ -226,7 +279,8 @@ module IBMWatson
226
279
  # the `accept` parameter to specify the audio format. For more information about
227
280
  # specifying an audio format, see **Audio formats (accept types)** in the method
228
281
  # description.
229
- # @param voice [String] The voice to use for synthesis.
282
+ # @param voice [String] The voice to use for synthesis. For more information about specifying a voice, see
283
+ # **Important voice updates** in the method description.
230
284
  # @param customization_id [String] The customization ID (GUID) of a custom model to use for the synthesis. If a
231
285
  # custom model is specified, it works only if it matches the language of the
232
286
  # indicated voice. You must make the request with credentials for the instance of
@@ -277,13 +331,39 @@ module IBMWatson
277
331
  #
278
332
  # **See also:** [Querying a word from a
279
333
  # language](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordsQueryLanguage).
334
+ #
335
+ #
336
+ # ### Important voice updates
337
+ #
338
+ # The service's voices underwent significant change on 2 December 2020.
339
+ # * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
340
+ # instead of concatenative.
341
+ # * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
342
+ # * The `ar-AR` language identifier cannot be used to create a custom model. Use the
343
+ # `ar-MS` identifier instead.
344
+ # * The standard concatenative voices for the following languages are now
345
+ # deprecated: Brazilian Portuguese, United Kingdom and United States English,
346
+ # French, German, Italian, Japanese, and Spanish (all dialects).
347
+ # * The features expressive SSML, voice transformation SSML, and use of the `volume`
348
+ # attribute of the `<prosody>` element are deprecated and are not supported with any
349
+ # of the service's neural voices.
350
+ # * All of the service's voices are now customizable and generally available (GA)
351
+ # for production use.
352
+ #
353
+ # The deprecated voices and features will continue to function for at least one year
354
+ # but might be removed at a future date. You are encouraged to migrate to the
355
+ # equivalent neural voices at your earliest convenience. For more information about
356
+ # all voice updates, see the [2 December 2020 service
357
+ # update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
358
+ # in the release notes.
280
359
  # @param text [String] The word for which the pronunciation is requested.
281
360
  # @param voice [String] A voice that specifies the language in which the pronunciation is to be returned.
282
361
  # All voices for the same language (for example, `en-US`) return the same
283
- # translation.
362
+ # translation. For more information about specifying a voice, see **Important voice
363
+ # updates** in the method description.
284
364
  # @param format [String] The phoneme format in which to return the pronunciation. The Arabic, Chinese,
285
- # Dutch, and Korean languages support only IPA. Omit the parameter to obtain the
286
- # pronunciation in the default format.
365
+ # Dutch, Australian English, and Korean languages support only IPA. Omit the
366
+ # parameter to obtain the pronunciation in the default format.
287
367
  # @param customization_id [String] The customization ID (GUID) of a custom model for which the pronunciation is to be
288
368
  # returned. The language of a specified custom model must match the language of the
289
369
  # specified voice. If the word is not defined in the specified custom model, the
@@ -332,11 +412,37 @@ module IBMWatson
332
412
  #
333
413
  # **See also:** [Creating a custom
334
414
  # model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsCreate).
415
+ #
416
+ #
417
+ # ### Important voice updates
418
+ #
419
+ # The service's voices underwent significant change on 2 December 2020.
420
+ # * The Arabic, Chinese, Dutch, Australian English, and Korean voices are now neural
421
+ # instead of concatenative.
422
+ # * The `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead.
423
+ # * The `ar-AR` language identifier cannot be used to create a custom model. Use the
424
+ # `ar-MS` identifier instead.
425
+ # * The standard concatenative voices for the following languages are now
426
+ # deprecated: Brazilian Portuguese, United Kingdom and United States English,
427
+ # French, German, Italian, Japanese, and Spanish (all dialects).
428
+ # * The features expressive SSML, voice transformation SSML, and use of the `volume`
429
+ # attribute of the `<prosody>` element are deprecated and are not supported with any
430
+ # of the service's neural voices.
431
+ # * All of the service's voices are now customizable and generally available (GA)
432
+ # for production use.
433
+ #
434
+ # The deprecated voices and features will continue to function for at least one year
435
+ # but might be removed at a future date. You are encouraged to migrate to the
436
+ # equivalent neural voices at your earliest convenience. For more information about
437
+ # all voice updates, see the [2 December 2020 service
438
+ # update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020)
439
+ # in the release notes.
335
440
  # @param name [String] The name of the new custom model.
336
441
  # @param language [String] The language of the new custom model. You create a custom model for a specific
337
- # language, not for a specific voice. A custom model can be used with any voice,
338
- # standard or neural, for its specified language. Omit the parameter to use the the
339
- # default language, `en-US`.
442
+ # language, not for a specific voice. A custom model can be used with any voice for
443
+ # its specified language. Omit the parameter to use the the default language,
444
+ # `en-US`. **Note:** The `ar-AR` language identifier cannot be used to create a
445
+ # custom model. Use the `ar-MS` identifier instead.
340
446
  # @param description [String] A description of the new custom model. Specifying a description is recommended.
341
447
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
342
448
  def create_custom_model(name:, language: nil, description: nil)
@@ -370,8 +476,8 @@ module IBMWatson
370
476
  # List custom models.
371
477
  # Lists metadata such as the name and description for all custom models that are
372
478
  # owned by an instance of the service. Specify a language to list the custom models
373
- # for that language only. To see the words in addition to the metadata for a
374
- # specific custom model, use the **List a custom model** method. You must use
479
+ # for that language only. To see the words and prompts in addition to the metadata
480
+ # for a specific custom model, use the **Get a custom model** method. You must use
375
481
  # credentials for the instance of the service that owns a model to list information
376
482
  # about it.
377
483
  #
@@ -473,8 +579,9 @@ module IBMWatson
473
579
  # Get a custom model.
474
580
  # Gets all information about a specified custom model. In addition to metadata such
475
581
  # as the name and description of the custom model, the output includes the words and
476
- # their translations as defined in the model. To see just the metadata for a model,
477
- # use the **List custom models** method.
582
+ # their translations that are defined for the model, as well as any prompts that are
583
+ # defined for the model. To see just the metadata for a model, use the **List custom
584
+ # models** method.
478
585
  #
479
586
  # **See also:** [Querying a custom
480
587
  # model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsQuery).
@@ -666,9 +773,9 @@ module IBMWatson
666
773
  # @param word [String] The word that is to be added or updated for the custom model.
667
774
  # @param translation [String] The phonetic or sounds-like translation for the word. A phonetic translation is
668
775
  # based on the SSML format for representing the phonetic string of a word either as
669
- # an IPA translation or as an IBM SPR translation. The Arabic, Chinese, Dutch, and
670
- # Korean languages support only IPA. A sounds-like is one or more words that, when
671
- # combined, sound like the word.
776
+ # an IPA translation or as an IBM SPR translation. The Arabic, Chinese, Dutch,
777
+ # Australian English, and Korean languages support only IPA. A sounds-like is one or
778
+ # more words that, when combined, sound like the word.
672
779
  # @param part_of_speech [String] **Japanese only.** The part of speech for the word. The service uses the value to
673
780
  # produce the correct intonation for the word. You can create only a single entry,
674
781
  # with or without a single part of speech, for any word; you cannot create multiple
@@ -772,6 +879,481 @@ module IBMWatson
772
879
  nil
773
880
  end
774
881
  #########################
882
+ # Custom prompts
883
+ #########################
884
+
885
+ ##
886
+ # @!method list_custom_prompts(customization_id:)
887
+ # List custom prompts.
888
+ # Lists information about all custom prompts that are defined for a custom model.
889
+ # The information includes the prompt ID, prompt text, status, and optional speaker
890
+ # ID for each prompt of the custom model. You must use credentials for the instance
891
+ # of the service that owns the custom model. The same information about all of the
892
+ # prompts for a custom model is also provided by the **Get a custom model** method.
893
+ # That method provides complete details about a specified custom model, including
894
+ # its language, owner, custom words, and more.
895
+ #
896
+ # **Beta:** Custom prompts are beta functionality that is supported only for use
897
+ # with US English custom models and voices.
898
+ #
899
+ # **See also:** [Listing custom
900
+ # prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list).
901
+ # @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
902
+ # credentials for the instance of the service that owns the custom model.
903
+ # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
904
+ def list_custom_prompts(customization_id:)
905
+ raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
906
+
907
+ headers = {
908
+ }
909
+ sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "list_custom_prompts")
910
+ headers.merge!(sdk_headers)
911
+
912
+ method_url = "/v1/customizations/%s/prompts" % [ERB::Util.url_encode(customization_id)]
913
+
914
+ response = request(
915
+ method: "GET",
916
+ url: method_url,
917
+ headers: headers,
918
+ accept_json: true
919
+ )
920
+ response
921
+ end
922
+
923
+ ##
924
+ # @!method add_custom_prompt(customization_id:, prompt_id:, metadata:, file:, filename: nil)
925
+ # Add a custom prompt.
926
+ # Adds a custom prompt to a custom model. A prompt is defined by the text that is to
927
+ # be spoken, the audio for that text, a unique user-specified ID for the prompt, and
928
+ # an optional speaker ID. The information is used to generate prosodic data that is
929
+ # not visible to the user. This data is used by the service to produce the
930
+ # synthesized audio upon request. You must use credentials for the instance of the
931
+ # service that owns a custom model to add a prompt to it. You can add a maximum of
932
+ # 1000 custom prompts to a single custom model.
933
+ #
934
+ # You are recommended to assign meaningful values for prompt IDs. For example, use
935
+ # `goodbye` to identify a prompt that speaks a farewell message. Prompt IDs must be
936
+ # unique within a given custom model. You cannot define two prompts with the same
937
+ # name for the same custom model. If you provide the ID of an existing prompt, the
938
+ # previously uploaded prompt is replaced by the new information. The existing prompt
939
+ # is reprocessed by using the new text and audio and, if provided, new speaker
940
+ # model, and the prosody data associated with the prompt is updated.
941
+ #
942
+ # The quality of a prompt is undefined if the language of a prompt does not match
943
+ # the language of its custom model. This is consistent with any text or SSML that is
944
+ # specified for a speech synthesis request. The service makes a best-effort attempt
945
+ # to render the specified text for the prompt; it does not validate that the
946
+ # language of the text matches the language of the model.
947
+ #
948
+ # Adding a prompt is an asynchronous operation. Although it accepts less audio than
949
+ # speaker enrollment, the service must align the audio with the provided text. The
950
+ # time that it takes to process a prompt depends on the prompt itself. The
951
+ # processing time for a reasonably sized prompt generally matches the length of the
952
+ # audio (for example, it takes 20 seconds to process a 20-second prompt).
953
+ #
954
+ # For shorter prompts, you can wait for a reasonable amount of time and then check
955
+ # the status of the prompt with the **Get a custom prompt** method. For longer
956
+ # prompts, consider using that method to poll the service every few seconds to
957
+ # determine when the prompt becomes available. No prompt can be used for speech
958
+ # synthesis if it is in the `processing` or `failed` state. Only prompts that are in
959
+ # the `available` state can be used for speech synthesis.
960
+ #
961
+ # When it processes a request, the service attempts to align the text and the audio
962
+ # that are provided for the prompt. The text that is passed with a prompt must match
963
+ # the spoken audio as closely as possible. Optimally, the text and audio match
964
+ # exactly. The service does its best to align the specified text with the audio, and
965
+ # it can often compensate for mismatches between the two. But if the service cannot
966
+ # effectively align the text and the audio, possibly because the magnitude of
967
+ # mismatches between the two is too great, processing of the prompt fails.
968
+ #
969
+ # ### Evaluating a prompt
970
+ #
971
+ # Always listen to and evaluate a prompt to determine its quality before using it
972
+ # in production. To evaluate a prompt, include only the single prompt in a speech
973
+ # synthesis request by using the following SSML extension, in this case for a prompt
974
+ # whose ID is `goodbye`:
975
+ #
976
+ # `<ibm:prompt id="goodbye"/>`
977
+ #
978
+ # In some cases, you might need to rerecord and resubmit a prompt as many as five
979
+ # times to address the following possible problems:
980
+ # * The service might fail to detect a mismatch between the prompts text and audio.
981
+ # The longer the prompt, the greater the chance for misalignment between its text
982
+ # and audio. Therefore, multiple shorter prompts are preferable to a single long
983
+ # prompt.
984
+ # * The text of a prompt might include a word that the service does not recognize.
985
+ # In this case, you can create a custom word and pronunciation pair to tell the
986
+ # service how to pronounce the word. You must then re-create the prompt.
987
+ # * The quality of the input audio might be insufficient or the services processing
988
+ # of the audio might fail to detect the intended prosody. Submitting new audio for
989
+ # the prompt can correct these issues.
990
+ #
991
+ # If a prompt that is created without a speaker ID does not adequately reflect the
992
+ # intended prosody, enrolling the speaker and providing a speaker ID for the prompt
993
+ # is one recommended means of potentially improving the quality of the prompt. This
994
+ # is especially important for shorter prompts such as "good-bye" or "thank you,"
995
+ # where less audio data makes it more difficult to match the prosody of the speaker.
996
+ #
997
+ #
998
+ # **Beta:** Custom prompts are beta functionality that is supported only for use
999
+ # with US English custom models and voices.
1000
+ #
1001
+ # **See also:**
1002
+ # * [Add a custom
1003
+ # prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-add-prompt)
1004
+ # * [Evaluate a custom
1005
+ # prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-evaluate-prompt)
1006
+ # * [Rules for creating custom
1007
+ # prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-prompts).
1008
+ # @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
1009
+ # credentials for the instance of the service that owns the custom model.
1010
+ # @param prompt_id [String] The identifier of the prompt that is to be added to the custom model:
1011
+ # * Include a maximum of 49 characters in the ID.
1012
+ # * Include only alphanumeric characters and `_` (underscores) in the ID.
1013
+ # * Do not include XML sensitive characters (double quotes, single quotes,
1014
+ # ampersands, angle brackets, and slashes) in the ID.
1015
+ # * To add a new prompt, the ID must be unique for the specified custom model.
1016
+ # Otherwise, the new information for the prompt overwrites the existing prompt that
1017
+ # has that ID.
1018
+ # @param metadata [PromptMetadata] Information about the prompt that is to be added to a custom model. The following
1019
+ # example of a `PromptMetadata` object includes both the required prompt text and an
1020
+ # optional speaker model ID:
1021
+ #
1022
+ # `{ "prompt_text": "Thank you and good-bye!", "speaker_id":
1023
+ # "823068b2-ed4e-11ea-b6e0-7b6456aa95cc" }`.
1024
+ # @param file [File] An audio file that speaks the text of the prompt with intonation and prosody that
1025
+ # matches how you would like the prompt to be spoken.
1026
+ # * The prompt audio must be in WAV format and must have a minimum sampling rate of
1027
+ # 16 kHz. The service accepts audio with higher sampling rates. The service
1028
+ # transcodes all audio to 16 kHz before processing it.
1029
+ # * The length of the prompt audio is limited to 30 seconds.
1030
+ # @param filename [String] The filename for file.
1031
+ # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1032
+ def add_custom_prompt(customization_id:, prompt_id:, metadata:, file:, filename: nil)
1033
+ raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
1034
+
1035
+ raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
1036
+
1037
+ raise ArgumentError.new("metadata must be provided") if metadata.nil?
1038
+
1039
+ raise ArgumentError.new("file must be provided") if file.nil?
1040
+
1041
+ headers = {
1042
+ }
1043
+ sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "add_custom_prompt")
1044
+ headers.merge!(sdk_headers)
1045
+
1046
+ form_data = {}
1047
+
1048
+ form_data[:metadata] = HTTP::FormData::Part.new(metadata.to_s, content_type: "application/json")
1049
+
1050
+ unless file.instance_of?(StringIO) || file.instance_of?(File)
1051
+ file = file.respond_to?(:to_json) ? StringIO.new(file.to_json) : StringIO.new(file)
1052
+ end
1053
+ filename = file.path if filename.nil? && file.respond_to?(:path)
1054
+ form_data[:file] = HTTP::FormData::File.new(file, content_type: "audio/wav", filename: filename)
1055
+
1056
+ method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
1057
+
1058
+ response = request(
1059
+ method: "POST",
1060
+ url: method_url,
1061
+ headers: headers,
1062
+ form: form_data,
1063
+ accept_json: true
1064
+ )
1065
+ response
1066
+ end
1067
+
1068
+ ##
1069
+ # @!method get_custom_prompt(customization_id:, prompt_id:)
1070
+ # Get a custom prompt.
1071
+ # Gets information about a specified custom prompt for a specified custom model. The
1072
+ # information includes the prompt ID, prompt text, status, and optional speaker ID
1073
+ # for each prompt of the custom model. You must use credentials for the instance of
1074
+ # the service that owns the custom model.
1075
+ #
1076
+ # **Beta:** Custom prompts are beta functionality that is supported only for use
1077
+ # with US English custom models and voices.
1078
+ #
1079
+ # **See also:** [Listing custom
1080
+ # prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list).
1081
+ # @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
1082
+ # credentials for the instance of the service that owns the custom model.
1083
+ # @param prompt_id [String] The identifier (name) of the prompt.
1084
+ # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1085
+ def get_custom_prompt(customization_id:, prompt_id:)
1086
+ raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
1087
+
1088
+ raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
1089
+
1090
+ headers = {
1091
+ }
1092
+ sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "get_custom_prompt")
1093
+ headers.merge!(sdk_headers)
1094
+
1095
+ method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
1096
+
1097
+ response = request(
1098
+ method: "GET",
1099
+ url: method_url,
1100
+ headers: headers,
1101
+ accept_json: true
1102
+ )
1103
+ response
1104
+ end
1105
+
1106
+ ##
1107
+ # @!method delete_custom_prompt(customization_id:, prompt_id:)
1108
+ # Delete a custom prompt.
1109
+ # Deletes an existing custom prompt from a custom model. The service deletes the
1110
+ # prompt with the specified ID. You must use credentials for the instance of the
1111
+ # service that owns the custom model from which the prompt is to be deleted.
1112
+ #
1113
+ # **Caution:** Deleting a custom prompt elicits a 400 response code from synthesis
1114
+ # requests that attempt to use the prompt. Make sure that you do not attempt to use
1115
+ # a deleted prompt in a production application.
1116
+ #
1117
+ # **Beta:** Custom prompts are beta functionality that is supported only for use
1118
+ # with US English custom models and voices.
1119
+ #
1120
+ # **See also:** [Deleting a custom
1121
+ # prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-delete).
1122
+ # @param customization_id [String] The customization ID (GUID) of the custom model. You must make the request with
1123
+ # credentials for the instance of the service that owns the custom model.
1124
+ # @param prompt_id [String] The identifier (name) of the prompt that is to be deleted.
1125
+ # @return [nil]
1126
+ def delete_custom_prompt(customization_id:, prompt_id:)
1127
+ raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
1128
+
1129
+ raise ArgumentError.new("prompt_id must be provided") if prompt_id.nil?
1130
+
1131
+ headers = {
1132
+ }
1133
+ sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "delete_custom_prompt")
1134
+ headers.merge!(sdk_headers)
1135
+
1136
+ method_url = "/v1/customizations/%s/prompts/%s" % [ERB::Util.url_encode(customization_id), ERB::Util.url_encode(prompt_id)]
1137
+
1138
+ request(
1139
+ method: "DELETE",
1140
+ url: method_url,
1141
+ headers: headers,
1142
+ accept_json: false
1143
+ )
1144
+ nil
1145
+ end
1146
+ #########################
1147
+ # Speaker models
1148
+ #########################
1149
+
1150
+ ##
1151
+ # @!method list_speaker_models
1152
+ # List speaker models.
1153
+ # Lists information about all speaker models that are defined for a service
1154
+ # instance. The information includes the speaker ID and speaker name of each defined
1155
+ # speaker. You must use credentials for the instance of a service to list its
1156
+ # speakers.
1157
+ #
1158
+ # **Beta:** Speaker models and the custom prompts with which they are used are beta
1159
+ # functionality that is supported only for use with US English custom models and
1160
+ # voices.
1161
+ #
1162
+ # **See also:** [Listing speaker
1163
+ # models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list).
1164
+ # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1165
+ def list_speaker_models
1166
+ headers = {
1167
+ }
1168
+ sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "list_speaker_models")
1169
+ headers.merge!(sdk_headers)
1170
+
1171
+ method_url = "/v1/speakers"
1172
+
1173
+ response = request(
1174
+ method: "GET",
1175
+ url: method_url,
1176
+ headers: headers,
1177
+ accept_json: true
1178
+ )
1179
+ response
1180
+ end
1181
+
1182
+ ##
1183
+ # @!method create_speaker_model(speaker_name:, audio:)
1184
+ # Create a speaker model.
1185
+ # Creates a new speaker model, which is an optional enrollment token for users who
1186
+ # are to add prompts to custom models. A speaker model contains information about a
1187
+ # user's voice. The service extracts this information from a WAV audio sample that
1188
+ # you pass as the body of the request. Associating a speaker model with a prompt is
1189
+ # optional, but the information that is extracted from the speaker model helps the
1190
+ # service learn about the speaker's voice.
1191
+ #
1192
+ # A speaker model can make an appreciable difference in the quality of prompts,
1193
+ # especially short prompts with relatively little audio, that are associated with
1194
+ # that speaker. A speaker model can help the service produce a prompt with more
1195
+ # confidence; the lack of a speaker model can potentially compromise the quality of
1196
+ # a prompt.
1197
+ #
1198
+ # The gender of the speaker who creates a speaker model does not need to match the
1199
+ # gender of a voice that is used with prompts that are associated with that speaker
1200
+ # model. For example, a speaker model that is created by a male speaker can be
1201
+ # associated with prompts that are spoken by female voices.
1202
+ #
1203
+ # You create a speaker model for a given instance of the service. The new speaker
1204
+ # model is owned by the service instance whose credentials are used to create it.
1205
+ # That same speaker can then be used to create prompts for all custom models within
1206
+ # that service instance. No language is associated with a speaker model, but each
1207
+ # custom model has a single specified language. You can add prompts only to US
1208
+ # English models.
1209
+ #
1210
+ # You specify a name for the speaker when you create it. The name must be unique
1211
+ # among all speaker names for the owning service instance. To re-create a speaker
1212
+ # model for an existing speaker name, you must first delete the existing speaker
1213
+ # model that has that name.
1214
+ #
1215
+ # Speaker enrollment is a synchronous operation. Although it accepts more audio data
1216
+ # than a prompt, the process of adding a speaker is very fast. The service simply
1217
+ # extracts information about the speakers voice from the audio. Unlike prompts,
1218
+ # speaker models neither need nor accept a transcription of the audio. When the call
1219
+ # returns, the audio is fully processed and the speaker enrollment is complete.
1220
+ #
1221
+ # The service returns a speaker ID with the request. A speaker ID is globally unique
1222
+ # identifier (GUID) that you use to identify the speaker in subsequent requests to
1223
+ # the service.
1224
+ #
1225
+ # **Beta:** Speaker models and the custom prompts with which they are used are beta
1226
+ # functionality that is supported only for use with US English custom models and
1227
+ # voices.
1228
+ #
1229
+ # **See also:**
1230
+ # * [Create a speaker
1231
+ # model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-speaker-model)
1232
+ # * [Rules for creating speaker
1233
+ # models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-speakers).
1234
+ # @param speaker_name [String] The name of the speaker that is to be added to the service instance.
1235
+ # * Include a maximum of 49 characters in the name.
1236
+ # * Include only alphanumeric characters and `_` (underscores) in the name.
1237
+ # * Do not include XML sensitive characters (double quotes, single quotes,
1238
+ # ampersands, angle brackets, and slashes) in the name.
1239
+ # * Do not use the name of an existing speaker that is already defined for the
1240
+ # service instance.
1241
+ # @param audio [File] An enrollment audio file that contains a sample of the speakers voice.
1242
+ # * The enrollment audio must be in WAV format and must have a minimum sampling rate
1243
+ # of 16 kHz. The service accepts audio with higher sampling rates. It transcodes all
1244
+ # audio to 16 kHz before processing it.
1245
+ # * The length of the enrollment audio is limited to 1 minute. Speaking one or two
1246
+ # paragraphs of text that include five to ten sentences is recommended.
1247
+ # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1248
+ def create_speaker_model(speaker_name:, audio:)
1249
+ raise ArgumentError.new("speaker_name must be provided") if speaker_name.nil?
1250
+
1251
+ raise ArgumentError.new("audio must be provided") if audio.nil?
1252
+
1253
+ headers = {
1254
+ }
1255
+ sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "create_speaker_model")
1256
+ headers.merge!(sdk_headers)
1257
+
1258
+ params = {
1259
+ "speaker_name" => speaker_name
1260
+ }
1261
+
1262
+ data = audio
1263
+ headers["Content-Type"] = "audio/wav"
1264
+
1265
+ method_url = "/v1/speakers"
1266
+
1267
+ response = request(
1268
+ method: "POST",
1269
+ url: method_url,
1270
+ headers: headers,
1271
+ params: params,
1272
+ data: data,
1273
+ accept_json: true
1274
+ )
1275
+ response
1276
+ end
1277
+
1278
+ ##
1279
+ # @!method get_speaker_model(speaker_id:)
1280
+ # Get a speaker model.
1281
+ # Gets information about all prompts that are defined by a specified speaker for all
1282
+ # custom models that are owned by a service instance. The information is grouped by
1283
+ # the customization IDs of the custom models. For each custom model, the information
1284
+ # lists information about each prompt that is defined for that custom model by the
1285
+ # speaker. You must use credentials for the instance of the service that owns a
1286
+ # speaker model to list its prompts.
1287
+ #
1288
+ # **Beta:** Speaker models and the custom prompts with which they are used are beta
1289
+ # functionality that is supported only for use with US English custom models and
1290
+ # voices.
1291
+ #
1292
+ # **See also:** [Listing the custom prompts for a speaker
1293
+ # model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list-prompts).
1294
+ # @param speaker_id [String] The speaker ID (GUID) of the speaker model. You must make the request with service
1295
+ # credentials for the instance of the service that owns the speaker model.
1296
+ # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1297
+ def get_speaker_model(speaker_id:)
1298
+ raise ArgumentError.new("speaker_id must be provided") if speaker_id.nil?
1299
+
1300
+ headers = {
1301
+ }
1302
+ sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "get_speaker_model")
1303
+ headers.merge!(sdk_headers)
1304
+
1305
+ method_url = "/v1/speakers/%s" % [ERB::Util.url_encode(speaker_id)]
1306
+
1307
+ response = request(
1308
+ method: "GET",
1309
+ url: method_url,
1310
+ headers: headers,
1311
+ accept_json: true
1312
+ )
1313
+ response
1314
+ end
1315
+
1316
+ ##
1317
+ # @!method delete_speaker_model(speaker_id:)
1318
+ # Delete a speaker model.
1319
+ # Deletes an existing speaker model from the service instance. The service deletes
1320
+ # the enrolled speaker with the specified speaker ID. You must use credentials for
1321
+ # the instance of the service that owns a speaker model to delete the speaker.
1322
+ #
1323
+ # Any prompts that are associated with the deleted speaker are not affected by the
1324
+ # speaker's deletion. The prosodic data that defines the quality of a prompt is
1325
+ # established when the prompt is created. A prompt is static and remains unaffected
1326
+ # by deletion of its associated speaker. However, the prompt cannot be resubmitted
1327
+ # or updated with its original speaker once that speaker is deleted.
1328
+ #
1329
+ # **Beta:** Speaker models and the custom prompts with which they are used are beta
1330
+ # functionality that is supported only for use with US English custom models and
1331
+ # voices.
1332
+ #
1333
+ # **See also:** [Deleting a speaker
1334
+ # model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-delete).
1335
+ # @param speaker_id [String] The speaker ID (GUID) of the speaker model. You must make the request with service
1336
+ # credentials for the instance of the service that owns the speaker model.
1337
+ # @return [nil]
1338
+ def delete_speaker_model(speaker_id:)
1339
+ raise ArgumentError.new("speaker_id must be provided") if speaker_id.nil?
1340
+
1341
+ headers = {
1342
+ }
1343
+ sdk_headers = Common.new.get_sdk_headers("text_to_speech", "V1", "delete_speaker_model")
1344
+ headers.merge!(sdk_headers)
1345
+
1346
+ method_url = "/v1/speakers/%s" % [ERB::Util.url_encode(speaker_id)]
1347
+
1348
+ request(
1349
+ method: "DELETE",
1350
+ url: method_url,
1351
+ headers: headers,
1352
+ accept_json: false
1353
+ )
1354
+ nil
1355
+ end
1356
+ #########################
775
1357
  # User data
776
1358
  #########################
777
1359