ibm_watson 1.3.0 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +33 -5
  3. data/lib/ibm_watson/assistant_v1.rb +225 -199
  4. data/lib/ibm_watson/assistant_v2.rb +228 -21
  5. data/lib/ibm_watson/compare_comply_v1.rb +43 -24
  6. data/lib/ibm_watson/discovery_v1.rb +144 -19
  7. data/lib/ibm_watson/discovery_v2.rb +742 -23
  8. data/lib/ibm_watson/language_translator_v3.rb +216 -64
  9. data/lib/ibm_watson/natural_language_classifier_v1.rb +11 -3
  10. data/lib/ibm_watson/natural_language_understanding_v1.rb +32 -26
  11. data/lib/ibm_watson/personality_insights_v3.rb +22 -14
  12. data/lib/ibm_watson/speech_to_text_v1.rb +240 -106
  13. data/lib/ibm_watson/text_to_speech_v1.rb +139 -146
  14. data/lib/ibm_watson/tone_analyzer_v3.rb +19 -14
  15. data/lib/ibm_watson/version.rb +1 -1
  16. data/lib/ibm_watson/visual_recognition_v3.rb +31 -14
  17. data/lib/ibm_watson/visual_recognition_v4.rb +112 -22
  18. data/test/integration/test_assistant_v1.rb +9 -0
  19. data/test/integration/test_assistant_v2.rb +34 -0
  20. data/test/integration/test_compare_comply_v1.rb +1 -12
  21. data/test/integration/test_discovery_v2.rb +118 -6
  22. data/test/integration/test_language_translator_v3.rb +5 -0
  23. data/test/integration/test_speech_to_text_v1.rb +2 -0
  24. data/test/integration/test_text_to_speech_v1.rb +3 -3
  25. data/test/integration/test_visual_recognition_v4.rb +9 -0
  26. data/test/unit/test_assistant_v1.rb +149 -98
  27. data/test/unit/test_assistant_v2.rb +153 -8
  28. data/test/unit/test_compare_comply_v1.rb +20 -20
  29. data/test/unit/test_discovery_v1.rb +125 -125
  30. data/test/unit/test_discovery_v2.rb +262 -29
  31. data/test/unit/test_language_translator_v3.rb +85 -24
  32. data/test/unit/test_natural_language_classifier_v1.rb +17 -17
  33. data/test/unit/test_natural_language_understanding_v1.rb +10 -10
  34. data/test/unit/test_personality_insights_v3.rb +14 -14
  35. data/test/unit/test_speech_to_text_v1.rb +97 -97
  36. data/test/unit/test_text_to_speech_v1.rb +48 -48
  37. data/test/unit/test_tone_analyzer_v3.rb +12 -12
  38. data/test/unit/test_visual_recognition_v3.rb +16 -16
  39. data/test/unit/test_visual_recognition_v4.rb +56 -38
  40. metadata +5 -5
@@ -13,14 +13,16 @@
13
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
-
16
+ #
17
+ # IBM OpenAPI SDK Code Generator Version: 3.17.0-8d569e8f-20201030-142059
18
+ #
17
19
  # Analyze various features of text content at scale. Provide text, raw HTML, or a public
18
20
  # URL and IBM Watson Natural Language Understanding will give you results for the features
19
21
  # you request. The service cleans HTML content before analysis by default, so the results
20
22
  # can ignore most advertisements and other unwanted content.
21
23
  #
22
24
  # You can create [custom
23
- # models](https://cloud.ibm.com/docs/services/natural-language-understanding?topic=natural-language-understanding-customizing)
25
+ # models](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-customizing)
24
26
  # with Watson Knowledge Studio to detect custom entities and relations in Natural Language
25
27
  # Understanding.
26
28
 
@@ -36,37 +38,36 @@ module IBMWatson
36
38
  # The Natural Language Understanding V1 service.
37
39
  class NaturalLanguageUnderstandingV1 < IBMCloudSdkCore::BaseService
38
40
  include Concurrent::Async
41
+ DEFAULT_SERVICE_NAME = "natural_language_understanding"
42
+ DEFAULT_SERVICE_URL = "https://api.us-south.natural-language-understanding.watson.cloud.ibm.com"
43
+ attr_accessor :version
39
44
  ##
40
45
  # @!method initialize(args)
41
46
  # Construct a new client for the Natural Language Understanding service.
42
47
  #
43
48
  # @param args [Hash] The args to initialize with
44
- # @option args version [String] The API version date to use with the service, in
45
- # "YYYY-MM-DD" format. Whenever the API is changed in a backwards
46
- # incompatible way, a new minor version of the API is released.
47
- # The service uses the API version for the date you specify, or
48
- # the most recent version before that date. Note that you should
49
- # not programmatically specify the current date at runtime, in
50
- # case the API has been updated since your application's release.
51
- # Instead, specify a version date that is compatible with your
52
- # application, and don't change it until your application is
53
- # ready for a later version.
49
+ # @option args version [String] Release date of the API version you want to use. Specify dates in YYYY-MM-DD
50
+ # format. The current version is `2020-08-01`.
54
51
  # @option args service_url [String] The base service URL to use when contacting the service.
55
52
  # The base service_url may differ between IBM Cloud regions.
56
53
  # @option args authenticator [Object] The Authenticator instance to be configured for this service.
54
+ # @option args service_name [String] The name of the service to configure. Will be used as the key to load
55
+ # any external configuration, if applicable.
57
56
  def initialize(args = {})
58
57
  @__async_initialized__ = false
59
58
  defaults = {}
60
- defaults[:version] = nil
61
- defaults[:service_url] = "https://gateway.watsonplatform.net/natural-language-understanding/api"
59
+ defaults[:service_url] = DEFAULT_SERVICE_URL
60
+ defaults[:service_name] = DEFAULT_SERVICE_NAME
62
61
  defaults[:authenticator] = nil
62
+ defaults[:version] = nil
63
+ user_service_url = args[:service_url] unless args[:service_url].nil?
63
64
  args = defaults.merge(args)
64
65
  @version = args[:version]
65
66
  raise ArgumentError.new("version must be provided") if @version.nil?
66
67
 
67
- args[:service_name] = "natural_language_understanding"
68
68
  args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
69
69
  super
70
+ @service_url = user_service_url unless user_service_url.nil?
70
71
  end
71
72
 
72
73
  #########################
@@ -86,11 +87,12 @@ module IBMWatson
86
87
  # - Relations
87
88
  # - Semantic roles
88
89
  # - Sentiment
89
- # - Syntax (Experimental).
90
+ # - Syntax
91
+ # - Summarization (Experimental)
90
92
  #
91
93
  # If a language for the input text is not specified with the `language` parameter,
92
94
  # the service [automatically detects the
93
- # language](https://cloud.ibm.com/docs/services/natural-language-understanding?topic=natural-language-understanding-detectable-languages).
95
+ # language](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-detectable-languages).
94
96
  # @param features [Features] Specific features to analyze the document for.
95
97
  # @param text [String] The plain text to analyze. One of the `text`, `html`, or `url` parameters is
96
98
  # required.
@@ -98,12 +100,11 @@ module IBMWatson
98
100
  # required.
99
101
  # @param url [String] The webpage to analyze. One of the `text`, `html`, or `url` parameters is
100
102
  # required.
101
- # @param clean [Boolean] Set this to `false` to disable webpage cleaning. To learn more about webpage
102
- # cleaning, see the [Analyzing
103
- # webpages](https://cloud.ibm.com/docs/services/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages)
104
- # documentation.
103
+ # @param clean [Boolean] Set this to `false` to disable webpage cleaning. For more information about
104
+ # webpage cleaning, see [Analyzing
105
+ # webpages](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages).
105
106
  # @param xpath [String] An [XPath
106
- # query](https://cloud.ibm.com/docs/services/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages#xpath)
107
+ # query](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages#xpath)
107
108
  # to perform on `html` or `url` input. Results of the query will be appended to the
108
109
  # cleaned webpage text before it is analyzed. To analyze only the results of the
109
110
  # XPath query, set the `clean` parameter to `false`.
@@ -111,12 +112,13 @@ module IBMWatson
111
112
  # @param return_analyzed_text [Boolean] Whether or not to return the analyzed text.
112
113
  # @param language [String] ISO 639-1 code that specifies the language of your text. This overrides automatic
113
114
  # language detection. Language support differs depending on the features you include
114
- # in your analysis. See [Language
115
- # support](https://cloud.ibm.com/docs/services/natural-language-understanding?topic=natural-language-understanding-language-support)
116
- # for more information.
115
+ # in your analysis. For more information, see [Language
116
+ # support](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-language-support).
117
117
  # @param limit_text_characters [Fixnum] Sets the maximum number of characters that are processed by the service.
118
118
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
119
119
  def analyze(features:, text: nil, html: nil, url: nil, clean: nil, xpath: nil, fallback_to_raw: nil, return_analyzed_text: nil, language: nil, limit_text_characters: nil)
120
+ raise ArgumentError.new("version must be provided") if version.nil?
121
+
120
122
  raise ArgumentError.new("features must be provided") if features.nil?
121
123
 
122
124
  headers = {
@@ -161,10 +163,12 @@ module IBMWatson
161
163
  # @!method list_models
162
164
  # List models.
163
165
  # Lists Watson Knowledge Studio [custom entities and relations
164
- # models](https://cloud.ibm.com/docs/services/natural-language-understanding?topic=natural-language-understanding-customizing)
166
+ # models](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-customizing)
165
167
  # that are deployed to your Natural Language Understanding service.
166
168
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
167
169
  def list_models
170
+ raise ArgumentError.new("version must be provided") if version.nil?
171
+
168
172
  headers = {
169
173
  }
170
174
  sdk_headers = Common.new.get_sdk_headers("natural-language-understanding", "V1", "list_models")
@@ -193,6 +197,8 @@ module IBMWatson
193
197
  # @param model_id [String] Model ID of the model to delete.
194
198
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
195
199
  def delete_model(model_id:)
200
+ raise ArgumentError.new("version must be provided") if version.nil?
201
+
196
202
  raise ArgumentError.new("model_id must be provided") if model_id.nil?
197
203
 
198
204
  headers = {
@@ -13,7 +13,9 @@
13
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
-
16
+ #
17
+ # IBM OpenAPI SDK Code Generator Version: 3.17.0-8d569e8f-20201030-142059
18
+ #
17
19
  # The IBM Watson&trade; Personality Insights service enables applications to derive
18
20
  # insights from social media, enterprise data, or other digital communications. The
19
21
  # service uses linguistic analytics to infer individuals' intrinsic personality
@@ -47,37 +49,41 @@ module IBMWatson
47
49
  # The Personality Insights V3 service.
48
50
  class PersonalityInsightsV3 < IBMCloudSdkCore::BaseService
49
51
  include Concurrent::Async
52
+ DEFAULT_SERVICE_NAME = "personality_insights"
53
+ DEFAULT_SERVICE_URL = "https://api.us-south.personality-insights.watson.cloud.ibm.com"
54
+ attr_accessor :version
50
55
  ##
51
56
  # @!method initialize(args)
52
57
  # Construct a new client for the Personality Insights service.
53
58
  #
54
59
  # @param args [Hash] The args to initialize with
55
- # @option args version [String] The API version date to use with the service, in
56
- # "YYYY-MM-DD" format. Whenever the API is changed in a backwards
57
- # incompatible way, a new minor version of the API is released.
58
- # The service uses the API version for the date you specify, or
59
- # the most recent version before that date. Note that you should
60
- # not programmatically specify the current date at runtime, in
61
- # case the API has been updated since your application's release.
62
- # Instead, specify a version date that is compatible with your
63
- # application, and don't change it until your application is
64
- # ready for a later version.
60
+ # @option args version [String] Release date of the version of the API you want to use. Specify dates in
61
+ # YYYY-MM-DD format. The current version is `2017-10-13`.
65
62
  # @option args service_url [String] The base service URL to use when contacting the service.
66
63
  # The base service_url may differ between IBM Cloud regions.
67
64
  # @option args authenticator [Object] The Authenticator instance to be configured for this service.
65
+ # @option args service_name [String] The name of the service to configure. Will be used as the key to load
66
+ # any external configuration, if applicable.
67
+ #
68
+ # @deprecated On 1 December 2021, Personality Insights will no longer be available.
69
+ # Consider migrating to Watson Natural Language Understanding.
70
+ # For more information, see [Personality Insights Deprecation](https://github.com/watson-developer-cloud/ruby-sdk/tree/master#personality-insights-deprecation).
68
71
  def initialize(args = {})
72
+ warn "On 1 December 2021, Personality Insights will no longer be available. For more information, see the README."
69
73
  @__async_initialized__ = false
70
74
  defaults = {}
71
- defaults[:version] = nil
72
- defaults[:service_url] = "https://gateway.watsonplatform.net/personality-insights/api"
75
+ defaults[:service_url] = DEFAULT_SERVICE_URL
76
+ defaults[:service_name] = DEFAULT_SERVICE_NAME
73
77
  defaults[:authenticator] = nil
78
+ defaults[:version] = nil
79
+ user_service_url = args[:service_url] unless args[:service_url].nil?
74
80
  args = defaults.merge(args)
75
81
  @version = args[:version]
76
82
  raise ArgumentError.new("version must be provided") if @version.nil?
77
83
 
78
- args[:service_name] = "personality_insights"
79
84
  args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
80
85
  super
86
+ @service_url = user_service_url unless user_service_url.nil?
81
87
  end
82
88
 
83
89
  #########################
@@ -164,6 +170,8 @@ module IBMWatson
164
170
  # default, no consumption preferences are returned.
165
171
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
166
172
  def profile(content:, accept:, content_type: nil, content_language: nil, accept_language: nil, raw_scores: nil, csv_headers: nil, consumption_preferences: nil)
173
+ raise ArgumentError.new("version must be provided") if version.nil?
174
+
167
175
  raise ArgumentError.new("content must be provided") if content.nil?
168
176
 
169
177
  raise ArgumentError.new("accept must be provided") if accept.nil?
@@ -13,13 +13,15 @@
13
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
-
17
- # The IBM&reg; Speech to Text service provides APIs that use IBM's speech-recognition
18
- # capabilities to produce transcripts of spoken audio. The service can transcribe speech
19
- # from various languages and audio formats. In addition to basic transcription, the
20
- # service can produce detailed information about many different aspects of the audio. For
21
- # most languages, the service supports two sampling rates, broadband and narrowband. It
22
- # returns all JSON response content in the UTF-8 character set.
16
+ #
17
+ # IBM OpenAPI SDK Code Generator Version: 3.17.0-8d569e8f-20201030-142059
18
+ #
19
+ # The IBM Watson&trade; Speech to Text service provides APIs that use IBM's
20
+ # speech-recognition capabilities to produce transcripts of spoken audio. The service can
21
+ # transcribe speech from various languages and audio formats. In addition to basic
22
+ # transcription, the service can produce detailed information about many different aspects
23
+ # of the audio. For most languages, the service supports two sampling rates, broadband and
24
+ # narrowband. It returns all JSON response content in the UTF-8 character set.
23
25
  #
24
26
  # For speech recognition, the service supports synchronous and asynchronous HTTP
25
27
  # Representational State Transfer (REST) interfaces. It also supports a WebSocket
@@ -34,9 +36,9 @@
34
36
  # is a formal language specification that lets you restrict the phrases that the service
35
37
  # can recognize.
36
38
  #
37
- # Language model customization is generally available for production use with most
38
- # supported languages. Acoustic model customization is beta functionality that is
39
- # available for all supported languages.
39
+ # Language model customization and acoustic model customization are generally available
40
+ # for production use with all language models that are generally available. Grammars are
41
+ # beta functionality for all language models that support language model customization.
40
42
 
41
43
  require "concurrent"
42
44
  require "erb"
@@ -50,6 +52,8 @@ module IBMWatson
50
52
  # The Speech to Text V1 service.
51
53
  class SpeechToTextV1 < IBMCloudSdkCore::BaseService
52
54
  include Concurrent::Async
55
+ DEFAULT_SERVICE_NAME = "speech_to_text"
56
+ DEFAULT_SERVICE_URL = "https://api.us-south.speech-to-text.watson.cloud.ibm.com"
53
57
  ##
54
58
  # @!method initialize(args)
55
59
  # Construct a new client for the Speech to Text service.
@@ -58,15 +62,19 @@ module IBMWatson
58
62
  # @option args service_url [String] The base service URL to use when contacting the service.
59
63
  # The base service_url may differ between IBM Cloud regions.
60
64
  # @option args authenticator [Object] The Authenticator instance to be configured for this service.
65
+ # @option args service_name [String] The name of the service to configure. Will be used as the key to load
66
+ # any external configuration, if applicable.
61
67
  def initialize(args = {})
62
68
  @__async_initialized__ = false
63
69
  defaults = {}
64
- defaults[:service_url] = "https://stream.watsonplatform.net/speech-to-text/api"
70
+ defaults[:service_url] = DEFAULT_SERVICE_URL
71
+ defaults[:service_name] = DEFAULT_SERVICE_NAME
65
72
  defaults[:authenticator] = nil
73
+ user_service_url = args[:service_url] unless args[:service_url].nil?
66
74
  args = defaults.merge(args)
67
- args[:service_name] = "speech_to_text"
68
75
  args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
69
76
  super
77
+ @service_url = user_service_url unless user_service_url.nil?
70
78
  end
71
79
 
72
80
  #########################
@@ -78,7 +86,8 @@ module IBMWatson
78
86
  # List models.
79
87
  # Lists all language models that are available for use with the service. The
80
88
  # information includes the name of the model and its minimum sampling rate in Hertz,
81
- # among other things.
89
+ # among other things. The ordering of the list of models can change from call to
90
+ # call; do not rely on an alphabetized or static list of models.
82
91
  #
83
92
  # **See also:** [Languages and
84
93
  # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
@@ -135,7 +144,7 @@ module IBMWatson
135
144
  #########################
136
145
 
137
146
  ##
138
- # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
147
+ # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
139
148
  # Recognize audio.
140
149
  # Sends audio and returns transcription results for a recognition request. You can
141
150
  # pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
@@ -223,7 +232,7 @@ module IBMWatson
223
232
  #
224
233
  # **See also:** [Making a multipart HTTP
225
234
  # request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-multi).
226
- # @param audio [String] The audio to transcribe.
235
+ # @param audio [File] The audio to transcribe.
227
236
  # @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
228
237
  # audio format, see **Audio formats (content types)** in the method description.
229
238
  # @param model [String] The identifier of the model that is to be used for the recognition request. See
@@ -277,8 +286,14 @@ module IBMWatson
277
286
  # @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
278
287
  # one or more string tokens. Keywords are spotted only in the final results, not in
279
288
  # interim hypotheses. If you specify any keywords, you must also specify a keywords
280
- # threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
281
- # an empty array if you do not need to spot keywords. See [Keyword
289
+ # threshold. Omit the parameter or specify an empty array if you do not need to spot
290
+ # keywords.
291
+ #
292
+ # You can spot a maximum of 1000 keywords with a single request. A single keyword
293
+ # can have a maximum length of 1024 characters, though the maximum effective length
294
+ # for double-byte languages might be shorter. Keywords are case-insensitive.
295
+ #
296
+ # See [Keyword
282
297
  # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
283
298
  # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
284
299
  # considered to match a keyword if its confidence is greater than or equal to the
@@ -323,11 +338,9 @@ module IBMWatson
323
338
  # parameter to be `true`, regardless of whether you specify `false` for the
324
339
  # parameter.
325
340
  #
326
- # **Note:** Applies to US English, Japanese, and Spanish (both broadband and
327
- # narrowband models) and UK English (narrowband model) transcription only. To
328
- # determine whether a language model supports speaker labels, you can also use the
329
- # **Get a model** method and check that the attribute `speaker_labels` is set to
330
- # `true`.
341
+ # **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
342
+ # Spanish (both broadband and narrowband models) and UK English (narrowband model)
343
+ # transcription only.
331
344
  #
332
345
  # See [Speaker
333
346
  # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
@@ -388,8 +401,33 @@ module IBMWatson
388
401
  #
389
402
  # See [Split transcript at phrase
390
403
  # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
404
+ # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
405
+ # the parameter to suppress word insertions from music, coughing, and other
406
+ # non-speech events. The service biases the audio it passes for speech recognition
407
+ # by evaluating the input audio against prior models of speech and non-speech
408
+ # activity.
409
+ #
410
+ # Specify a value between 0.0 and 1.0:
411
+ # * 0.0 suppresses all audio (no speech is transcribed).
412
+ # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
413
+ # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
414
+ #
415
+ # The values increase on a monotonic curve. See [Speech Activity
416
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
417
+ # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
418
+ # to prevent it from being transcribed as speech. Use the parameter to suppress side
419
+ # conversations or background noise.
420
+ #
421
+ # Specify a value in the range of 0.0 to 1.0:
422
+ # * 0.0 (the default) provides no suppression (background audio suppression is
423
+ # disabled).
424
+ # * 0.5 provides a reasonable level of audio suppression for general usage.
425
+ # * 1.0 suppresses all audio (no audio is transcribed).
426
+ #
427
+ # The values increase on a monotonic curve. See [Speech Activity
428
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
391
429
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
392
- def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
430
+ def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
393
431
  raise ArgumentError.new("audio must be provided") if audio.nil?
394
432
 
395
433
  headers = {
@@ -420,7 +458,9 @@ module IBMWatson
420
458
  "redaction" => redaction,
421
459
  "audio_metrics" => audio_metrics,
422
460
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
423
- "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
461
+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
462
+ "speech_detector_sensitivity" => speech_detector_sensitivity,
463
+ "background_audio_suppression" => background_audio_suppression
424
464
  }
425
465
 
426
466
  data = audio
@@ -439,7 +479,7 @@ module IBMWatson
439
479
  end
440
480
 
441
481
  ##
442
- # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
482
+ # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
443
483
  # Sends audio for speech recognition using web sockets.
444
484
  # @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
445
485
  # @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
@@ -449,7 +489,7 @@ module IBMWatson
449
489
  # @param customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
450
490
  # @param acoustic_customization_id [String] The GUID of a custom acoustic model that is to be used with the request. The base model of the specified custom acoustic model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom acoustic model is used.
451
491
  # @param language_customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
452
- # @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://console.bluemix.net/docs/services/speech-to-text/input.html#version).
492
+ # @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
453
493
  # @param inactivity_timeout [Integer] The time in seconds after which, if only silence (no speech) is detected in submitted audio, the connection is closed with a 400 error. Useful for stopping audio submission from a live microphone when a user simply walks away. Use `-1` for infinity.
454
494
  # @param interim_results [Boolean] Send back non-final previews of each "sentence" as it is being processed. These results are ignored in text mode.
455
495
  # @param keywords [Array<String>] Array of keyword strings to spot in the audio. Each keyword string can include one or more tokens. Keywords are spotted only in the final hypothesis, not in interim results. If you specify any keywords, you must also specify a keywords threshold. Omit the parameter or specify an empty array if you do not need to spot keywords.
@@ -460,13 +500,13 @@ module IBMWatson
460
500
  # @param timestamps [Boolean] If `true`, time alignment for each word is returned.
461
501
  # @param profanity_filter [Boolean] If `true` (the default), filters profanity from all output except for keyword results by replacing inappropriate words with a series of asterisks. Set the parameter to `false` to return results with no censoring. Applies to US English transcription only.
462
502
  # @param smart_formatting [Boolean] If `true`, converts dates, times, series of digits and numbers, phone numbers, currency values, and Internet addresses into more readable, conventional representations in the final transcript of a recognition request. If `false` (the default), no formatting is performed. Applies to US English transcription only.
463
- # @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://console.bluemix.net/docs/services/speech-to-text/output.html#speaker_labels).
503
+ # @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
464
504
  # @param grammar_name [String] The name of a grammar that is to be used with the recognition request. If you
465
505
  # specify a grammar, you must also use the `language_customization_id` parameter to
466
506
  # specify the name of the custom language model for which the grammar is defined.
467
507
  # The service recognizes only strings that are recognized by the specified grammar;
468
508
  # it does not recognize other custom words from the model's words resource. See
469
- # [Grammars](https://cloud.ibm.com/docs/services/speech-to-text/output.html).
509
+ # [Grammars](https://cloud.ibm.com/docs/speech-to-text/output.html).
470
510
  # @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
471
511
  # feature redacts any number that has three or more consecutive digits by replacing
472
512
  # each digit with an `X` character. It is intended to redact sensitive numeric data,
@@ -481,7 +521,7 @@ module IBMWatson
481
521
  # **Note:** Applies to US English, Japanese, and Korean transcription only.
482
522
  #
483
523
  # See [Numeric
484
- # redaction](https://cloud.ibm.com/docs/services/speech-to-text/output.html#redaction).
524
+ # redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
485
525
  #
486
526
  # @param processing_metrics [Boolean] If `true`, requests processing metrics about the service's transcription of the
487
527
  # input audio. The service returns processing metrics at the interval specified by
@@ -503,7 +543,7 @@ module IBMWatson
503
543
  # @return [WebSocketClient] Returns a new WebSocketClient object
504
544
  #
505
545
  # See [Audio
506
- # metrics](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
546
+ # metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
507
547
  # @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
508
548
  # splits a transcript into multiple final results. If the service detects pauses or
509
549
  # extended silence before it reaches the end of the audio stream, its response can
@@ -520,7 +560,7 @@ module IBMWatson
520
560
  # Chinese is 0.6 seconds.
521
561
  #
522
562
  # See [End of phrase silence
523
- # time](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#silence_time).
563
+ # time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
524
564
  # @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
525
565
  # based on semantic features of the input, for example, at the conclusion of
526
566
  # meaningful phrases such as sentences. The service bases its understanding of
@@ -530,7 +570,33 @@ module IBMWatson
530
570
  # interval.
531
571
  #
532
572
  # See [Split transcript at phrase
533
- # end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
573
+ # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
574
+ # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
575
+ # the parameter to suppress word insertions from music, coughing, and other
576
+ # non-speech events. The service biases the audio it passes for speech recognition
577
+ # by evaluating the input audio against prior models of speech and non-speech
578
+ # activity.
579
+ #
580
+ # Specify a value between 0.0 and 1.0:
581
+ # * 0.0 suppresses all audio (no speech is transcribed).
582
+ # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
583
+ # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
584
+ #
585
+ # The values increase on a monotonic curve. See [Speech Activity
586
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
587
+ # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
588
+ # to prevent it from being transcribed as speech. Use the parameter to suppress side
589
+ # conversations or background noise.
590
+ #
591
+ # Specify a value in the range of 0.0 to 1.0:
592
+ # * 0.0 (the default) provides no suppression (background audio suppression is
593
+ # disabled).
594
+ # * 0.5 provides a reasonable level of audio suppression for general usage.
595
+ # * 1.0 suppresses all audio (no audio is transcribed).
596
+ #
597
+ # The values increase on a monotonic curve. See [Speech Activity
598
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
599
+ # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
534
600
  def recognize_using_websocket(
535
601
  content_type: nil,
536
602
  recognize_callback:,
@@ -559,7 +625,9 @@ module IBMWatson
559
625
  processing_metrics_interval: nil,
560
626
  audio_metrics: nil,
561
627
  end_of_phrase_silence_time: nil,
562
- split_transcript_at_phrase_end: nil
628
+ split_transcript_at_phrase_end: nil,
629
+ speech_detector_sensitivity: nil,
630
+ background_audio_suppression: nil
563
631
  )
564
632
  raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
565
633
  raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
@@ -568,6 +636,7 @@ module IBMWatson
568
636
  require_relative("./websocket/speech_to_text_websocket_listener.rb")
569
637
  headers = {}
570
638
  headers = conn.default_options.headers.to_hash unless conn.default_options.headers.to_hash.empty?
639
+ @authenticator.authenticate(headers)
571
640
  service_url = @service_url.gsub("https:", "wss:")
572
641
  params = {
573
642
  "model" => model,
@@ -598,7 +667,9 @@ module IBMWatson
598
667
  "processing_metrics_interval" => processing_metrics_interval,
599
668
  "audio_metrics" => audio_metrics,
600
669
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
601
- "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
670
+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
671
+ "speech_detector_sensitivity" => speech_detector_sensitivity,
672
+ "background_audio_suppression" => background_audio_suppression
602
673
  }
603
674
  options.delete_if { |_, v| v.nil? }
604
675
  WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
@@ -611,9 +682,9 @@ module IBMWatson
611
682
  # @!method register_callback(callback_url:, user_secret: nil)
612
683
  # Register a callback.
613
684
  # Registers a callback URL with the service for use with subsequent asynchronous
614
- # recognition requests. The service attempts to register, or white-list, the
615
- # callback URL if it is not already registered by sending a `GET` request to the
616
- # callback URL. The service passes a random alphanumeric challenge string via the
685
+ # recognition requests. The service attempts to register, or allowlist, the callback
686
+ # URL if it is not already registered by sending a `GET` request to the callback
687
+ # URL. The service passes a random alphanumeric challenge string via the
617
688
  # `challenge_string` parameter of the request. The request includes an `Accept`
618
689
  # header that specifies `text/plain` as the required response type.
619
690
  #
@@ -625,9 +696,9 @@ module IBMWatson
625
696
  #
626
697
  # The service sends only a single `GET` request to the callback URL. If the service
627
698
  # does not receive a reply with a response code of 200 and a body that echoes the
628
- # challenge string sent by the service within five seconds, it does not white-list
699
+ # challenge string sent by the service within five seconds, it does not allowlist
629
700
  # the URL; it instead sends status code 400 in response to the **Register a
630
- # callback** request. If the requested callback URL is already white-listed, the
701
+ # callback** request. If the requested callback URL is already allowlisted, the
631
702
  # service responds to the initial registration request with response code 200.
632
703
  #
633
704
  # If you specify a user secret with the request, the service uses it as a key to
@@ -645,7 +716,7 @@ module IBMWatson
645
716
  # **See also:** [Registering a callback
646
717
  # URL](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#register).
647
718
  # @param callback_url [String] An HTTP or HTTPS URL to which callback notifications are to be sent. To be
648
- # white-listed, the URL must successfully echo the challenge string during URL
719
+ # allowlisted, the URL must successfully echo the challenge string during URL
649
720
  # verification. During verification, the client can also check the signature that
650
721
  # the service sends in the `X-Callback-Signature` header to verify the origin of the
651
722
  # request.
@@ -683,7 +754,7 @@ module IBMWatson
683
754
  ##
684
755
  # @!method unregister_callback(callback_url:)
685
756
  # Unregister a callback.
686
- # Unregisters a callback URL that was previously white-listed with a **Register a
757
+ # Unregisters a callback URL that was previously allowlisted with a **Register a
687
758
  # callback** request for use with the asynchronous interface. Once unregistered, the
688
759
  # URL can no longer be used with asynchronous recognition requests.
689
760
  #
@@ -716,7 +787,7 @@ module IBMWatson
716
787
  end
717
788
 
718
789
  ##
719
- # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
790
+ # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
720
791
  # Create a job.
721
792
  # Creates a job for a new asynchronous recognition request. The job is owned by the
722
793
  # instance of the service whose credentials are used to create it. How you learn the
@@ -814,14 +885,14 @@ module IBMWatson
814
885
  #
815
886
  # **See also:** [Audio
816
887
  # formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
817
- # @param audio [String] The audio to transcribe.
888
+ # @param audio [File] The audio to transcribe.
818
889
  # @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
819
890
  # audio format, see **Audio formats (content types)** in the method description.
820
891
  # @param model [String] The identifier of the model that is to be used for the recognition request. See
821
892
  # [Languages and
822
893
  # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
823
894
  # @param callback_url [String] A URL to which callback notifications are to be sent. The URL must already be
824
- # successfully white-listed by using the **Register a callback** method. You can
895
+ # successfully allowlisted by using the **Register a callback** method. You can
825
896
  # include the same callback URL with any number of job creation requests. Omit the
826
897
  # parameter to poll the service for job completion and results.
827
898
  #
@@ -902,8 +973,14 @@ module IBMWatson
902
973
  # @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
903
974
  # one or more string tokens. Keywords are spotted only in the final results, not in
904
975
  # interim hypotheses. If you specify any keywords, you must also specify a keywords
905
- # threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
906
- # an empty array if you do not need to spot keywords. See [Keyword
976
+ # threshold. Omit the parameter or specify an empty array if you do not need to spot
977
+ # keywords.
978
+ #
979
+ # You can spot a maximum of 1000 keywords with a single request. A single keyword
980
+ # can have a maximum length of 1024 characters, though the maximum effective length
981
+ # for double-byte languages might be shorter. Keywords are case-insensitive.
982
+ #
983
+ # See [Keyword
907
984
  # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
908
985
  # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
909
986
  # considered to match a keyword if its confidence is greater than or equal to the
@@ -948,11 +1025,9 @@ module IBMWatson
948
1025
  # parameter to be `true`, regardless of whether you specify `false` for the
949
1026
  # parameter.
950
1027
  #
951
- # **Note:** Applies to US English, Japanese, and Spanish (both broadband and
952
- # narrowband models) and UK English (narrowband model) transcription only. To
953
- # determine whether a language model supports speaker labels, you can also use the
954
- # **Get a model** method and check that the attribute `speaker_labels` is set to
955
- # `true`.
1028
+ # **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
1029
+ # Spanish (both broadband and narrowband models) and UK English (narrowband model)
1030
+ # transcription only.
956
1031
  #
957
1032
  # See [Speaker
958
1033
  # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
@@ -1035,8 +1110,33 @@ module IBMWatson
1035
1110
  #
1036
1111
  # See [Split transcript at phrase
1037
1112
  # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
1113
+ # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
1114
+ # the parameter to suppress word insertions from music, coughing, and other
1115
+ # non-speech events. The service biases the audio it passes for speech recognition
1116
+ # by evaluating the input audio against prior models of speech and non-speech
1117
+ # activity.
1118
+ #
1119
+ # Specify a value between 0.0 and 1.0:
1120
+ # * 0.0 suppresses all audio (no speech is transcribed).
1121
+ # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
1122
+ # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
1123
+ #
1124
+ # The values increase on a monotonic curve. See [Speech Activity
1125
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
1126
+ # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
1127
+ # to prevent it from being transcribed as speech. Use the parameter to suppress side
1128
+ # conversations or background noise.
1129
+ #
1130
+ # Specify a value in the range of 0.0 to 1.0:
1131
+ # * 0.0 (the default) provides no suppression (background audio suppression is
1132
+ # disabled).
1133
+ # * 0.5 provides a reasonable level of audio suppression for general usage.
1134
+ # * 1.0 suppresses all audio (no audio is transcribed).
1135
+ #
1136
+ # The values increase on a monotonic curve. See [Speech Activity
1137
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
1038
1138
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1039
- def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
1139
+ def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
1040
1140
  raise ArgumentError.new("audio must be provided") if audio.nil?
1041
1141
 
1042
1142
  headers = {
@@ -1073,7 +1173,9 @@ module IBMWatson
1073
1173
  "processing_metrics_interval" => processing_metrics_interval,
1074
1174
  "audio_metrics" => audio_metrics,
1075
1175
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
1076
- "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
1176
+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
1177
+ "speech_detector_sensitivity" => speech_detector_sensitivity,
1178
+ "background_audio_suppression" => background_audio_suppression
1077
1179
  }
1078
1180
 
1079
1181
  data = audio
@@ -1290,8 +1392,12 @@ module IBMWatson
1290
1392
  # **See also:** [Listing custom language
1291
1393
  # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language).
1292
1394
  # @param language [String] The identifier of the language for which custom language or custom acoustic models
1293
- # are to be returned (for example, `en-US`). Omit the parameter to see all custom
1294
- # language or custom acoustic models that are owned by the requesting credentials.
1395
+ # are to be returned. Omit the parameter to see all custom language or custom
1396
+ # acoustic models that are owned by the requesting credentials.
1397
+ #
1398
+ # To determine the languages for which customization is available, see [Language
1399
+ # support for
1400
+ # customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
1295
1401
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1296
1402
  def list_language_models(language: nil)
1297
1403
  headers = {
@@ -1599,18 +1705,20 @@ module IBMWatson
1599
1705
  #
1600
1706
  # The call returns an HTTP 201 response code if the corpus is valid. The service
1601
1707
  # then asynchronously processes the contents of the corpus and automatically
1602
- # extracts new words that it finds. This can take on the order of a minute or two to
1603
- # complete depending on the total number of words and the number of new words in the
1604
- # corpus, as well as the current load on the service. You cannot submit requests to
1605
- # add additional resources to the custom model or to train the model until the
1708
+ # extracts new words that it finds. This operation can take on the order of minutes
1709
+ # to complete depending on the total number of words and the number of new words in
1710
+ # the corpus, as well as the current load on the service. You cannot submit requests
1711
+ # to add additional resources to the custom model or to train the model until the
1606
1712
  # service's analysis of the corpus for the current request completes. Use the **List
1607
1713
  # a corpus** method to check the status of the analysis.
1608
1714
  #
1609
1715
  # The service auto-populates the model's words resource with words from the corpus
1610
- # that are not found in its base vocabulary. These are referred to as
1611
- # out-of-vocabulary (OOV) words. You can use the **List custom words** method to
1612
- # examine the words resource. You can use other words method to eliminate typos and
1613
- # modify how words are pronounced as needed.
1716
+ # that are not found in its base vocabulary. These words are referred to as
1717
+ # out-of-vocabulary (OOV) words. After adding a corpus, you must validate the words
1718
+ # resource to ensure that each OOV word's definition is complete and valid. You can
1719
+ # use the **List custom words** method to examine the words resource. You can use
1720
+ # other words method to eliminate typos and modify how words are pronounced as
1721
+ # needed.
1614
1722
  #
1615
1723
  # To add a corpus file that has the same name as an existing corpus, set the
1616
1724
  # `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
@@ -1627,10 +1735,12 @@ module IBMWatson
1627
1735
  # directly.
1628
1736
  #
1629
1737
  # **See also:**
1738
+ # * [Add a corpus to the custom language
1739
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus)
1630
1740
  # * [Working with
1631
1741
  # corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
1632
- # * [Add a corpus to the custom language
1633
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus).
1742
+ # * [Validating a words
1743
+ # resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
1634
1744
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1635
1745
  # the request. You must make the request with credentials for the instance of the
1636
1746
  # service that owns the custom model.
@@ -1859,7 +1969,10 @@ module IBMWatson
1859
1969
  # the parameter for words that are difficult to pronounce, foreign words, acronyms,
1860
1970
  # and so on. For example, you might specify that the word `IEEE` can sound like `i
1861
1971
  # triple e`. You can specify a maximum of five sounds-like pronunciations for a
1862
- # word.
1972
+ # word. If you omit the `sounds_like` field, the service attempts to set the field
1973
+ # to its pronunciation of the word. It cannot generate a pronunciation for all
1974
+ # words, so you must review the word's definition to ensure that it is complete and
1975
+ # valid.
1863
1976
  # * The `display_as` field provides a different way of spelling the word in a
1864
1977
  # transcript. Use the parameter when you want the word to appear different from its
1865
1978
  # usual representation or from its spelling in training data. For example, you might
@@ -1889,10 +2002,12 @@ module IBMWatson
1889
2002
  #
1890
2003
  #
1891
2004
  # **See also:**
2005
+ # * [Add words to the custom language
2006
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
1892
2007
  # * [Working with custom
1893
2008
  # words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
1894
- # * [Add words to the custom language
1895
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords).
2009
+ # * [Validating a words
2010
+ # resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
1896
2011
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1897
2012
  # the request. You must make the request with credentials for the instance of the
1898
2013
  # service that owns the custom model.
@@ -1948,7 +2063,10 @@ module IBMWatson
1948
2063
  # the parameter for words that are difficult to pronounce, foreign words, acronyms,
1949
2064
  # and so on. For example, you might specify that the word `IEEE` can sound like `i
1950
2065
  # triple e`. You can specify a maximum of five sounds-like pronunciations for a
1951
- # word.
2066
+ # word. If you omit the `sounds_like` field, the service attempts to set the field
2067
+ # to its pronunciation of the word. It cannot generate a pronunciation for all
2068
+ # words, so you must review the word's definition to ensure that it is complete and
2069
+ # valid.
1952
2070
  # * The `display_as` field provides a different way of spelling the word in a
1953
2071
  # transcript. Use the parameter when you want the word to appear different from its
1954
2072
  # usual representation or from its spelling in training data. For example, you might
@@ -1960,10 +2078,12 @@ module IBMWatson
1960
2078
  # the **List a custom word** method to review the word that you add.
1961
2079
  #
1962
2080
  # **See also:**
2081
+ # * [Add words to the custom language
2082
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
1963
2083
  # * [Working with custom
1964
2084
  # words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
1965
- # * [Add words to the custom language
1966
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords).
2085
+ # * [Validating a words
2086
+ # resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
1967
2087
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1968
2088
  # the request. You must make the request with credentials for the instance of the
1969
2089
  # service that owns the custom model.
@@ -2147,12 +2267,12 @@ module IBMWatson
2147
2267
  #
2148
2268
  # The call returns an HTTP 201 response code if the grammar is valid. The service
2149
2269
  # then asynchronously processes the contents of the grammar and automatically
2150
- # extracts new words that it finds. This can take a few seconds to complete
2151
- # depending on the size and complexity of the grammar, as well as the current load
2152
- # on the service. You cannot submit requests to add additional resources to the
2153
- # custom model or to train the model until the service's analysis of the grammar for
2154
- # the current request completes. Use the **Get a grammar** method to check the
2155
- # status of the analysis.
2270
+ # extracts new words that it finds. This operation can take a few seconds or minutes
2271
+ # to complete depending on the size and complexity of the grammar, as well as the
2272
+ # current load on the service. You cannot submit requests to add additional
2273
+ # resources to the custom model or to train the model until the service's analysis
2274
+ # of the grammar for the current request completes. Use the **Get a grammar** method
2275
+ # to check the status of the analysis.
2156
2276
  #
2157
2277
  # The service populates the model's words resource with any word that is recognized
2158
2278
  # by the grammar that is not found in the model's base vocabulary. These are
@@ -2396,8 +2516,12 @@ module IBMWatson
2396
2516
  # **See also:** [Listing custom acoustic
2397
2517
  # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic).
2398
2518
  # @param language [String] The identifier of the language for which custom language or custom acoustic models
2399
- # are to be returned (for example, `en-US`). Omit the parameter to see all custom
2400
- # language or custom acoustic models that are owned by the requesting credentials.
2519
+ # are to be returned. Omit the parameter to see all custom language or custom
2520
+ # acoustic models that are owned by the requesting credentials.
2521
+ #
2522
+ # To determine the languages for which customization is available, see [Language
2523
+ # support for
2524
+ # customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
2401
2525
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
2402
2526
  def list_acoustic_models(language: nil)
2403
2527
  headers = {
@@ -2495,14 +2619,14 @@ module IBMWatson
2495
2619
  # it. You must use credentials for the instance of the service that owns a model to
2496
2620
  # train it.
2497
2621
  #
2498
- # The training method is asynchronous. It can take on the order of minutes or hours
2499
- # to complete depending on the total amount of audio data on which the custom
2500
- # acoustic model is being trained and the current load on the service. Typically,
2501
- # training a custom acoustic model takes approximately two to four times the length
2502
- # of its audio data. The range of time depends on the model being trained and the
2503
- # nature of the audio, such as whether the audio is clean or noisy. The method
2504
- # returns an HTTP 200 response code to indicate that the training process has begun.
2505
- #
2622
+ # The training method is asynchronous. Training time depends on the cumulative
2623
+ # amount of audio data that the custom acoustic model contains and the current load
2624
+ # on the service. When you train or retrain a model, the service uses all of the
2625
+ # model's audio data in the training. Training a custom acoustic model takes
2626
+ # approximately as long as the length of its cumulative audio data. For example, it
2627
+ # takes approximately 2 hours to train a model that contains a total of 2 hours of
2628
+ # audio. The method returns an HTTP 200 response code to indicate that the training
2629
+ # process has begun.
2506
2630
  #
2507
2631
  # You can monitor the status of the training by using the **Get a custom acoustic
2508
2632
  # model** method to poll the model's status. Use a loop to check the status once a
@@ -2518,8 +2642,9 @@ module IBMWatson
2518
2642
  # Train with a custom language model if you have verbatim transcriptions of the
2519
2643
  # audio files that you have added to the custom model or you have either corpora
2520
2644
  # (text files) or a list of words that are relevant to the contents of the audio
2521
- # files. Both of the custom models must be based on the same version of the same
2522
- # base model for training to succeed.
2645
+ # files. For training to succeed, both of the custom models must be based on the
2646
+ # same version of the same base model, and the custom language model must be fully
2647
+ # trained and available.
2523
2648
  #
2524
2649
  # **See also:**
2525
2650
  # * [Train the custom acoustic
@@ -2535,6 +2660,9 @@ module IBMWatson
2535
2660
  # another training request or a request to add audio resources to the model.
2536
2661
  # * The custom model contains less than 10 minutes or more than 200 hours of audio
2537
2662
  # data.
2663
+ # * You passed a custom language model with the `custom_language_model_id` query
2664
+ # parameter that is not in the available state. A custom language model must be
2665
+ # fully trained and available to be used to train a custom acoustic model.
2538
2666
  # * You passed an incompatible custom language model with the
2539
2667
  # `custom_language_model_id` query parameter. Both custom models must be based on
2540
2668
  # the same version of the same base model.
@@ -2550,8 +2678,8 @@ module IBMWatson
2550
2678
  # been trained with verbatim transcriptions of the audio resources or that contains
2551
2679
  # words that are relevant to the contents of the audio resources. The custom
2552
2680
  # language model must be based on the same version of the same base model as the
2553
- # custom acoustic model. The credentials specified with the request must own both
2554
- # custom models.
2681
+ # custom acoustic model, and the custom language model must be fully trained and
2682
+ # available. The credentials specified with the request must own both custom models.
2555
2683
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
2556
2684
  def train_acoustic_model(customization_id:, custom_language_model_id: nil)
2557
2685
  raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
@@ -2649,8 +2777,9 @@ module IBMWatson
2649
2777
  # service that owns the custom model.
2650
2778
  # @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
2651
2779
  # customization ID (GUID) of that custom language model. The custom language model
2652
- # must be upgraded before the custom acoustic model can be upgraded. The credentials
2653
- # specified with the request must own both custom models.
2780
+ # must be upgraded before the custom acoustic model can be upgraded. The custom
2781
+ # language model must be fully trained and available. The credentials specified with
2782
+ # the request must own both custom models.
2654
2783
  # @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
2655
2784
  # has been modified since it was last trained. Use this parameter only to force the
2656
2785
  # upgrade of a custom acoustic model that is trained with a custom language model,
@@ -2745,14 +2874,14 @@ module IBMWatson
2745
2874
  # same name as an existing audio resource, set the `allow_overwrite` parameter to
2746
2875
  # `true`; otherwise, the request fails.
2747
2876
  #
2748
- # The method is asynchronous. It can take several seconds to complete depending on
2749
- # the duration of the audio and, in the case of an archive file, the total number of
2750
- # audio files being processed. The service returns a 201 response code if the audio
2751
- # is valid. It then asynchronously analyzes the contents of the audio file or files
2752
- # and automatically extracts information about the audio such as its length,
2753
- # sampling rate, and encoding. You cannot submit requests to train or upgrade the
2754
- # model until the service's analysis of all audio resources for current requests
2755
- # completes.
2877
+ # The method is asynchronous. It can take several seconds or minutes to complete
2878
+ # depending on the duration of the audio and, in the case of an archive file, the
2879
+ # total number of audio files being processed. The service returns a 201 response
2880
+ # code if the audio is valid. It then asynchronously analyzes the contents of the
2881
+ # audio file or files and automatically extracts information about the audio such as
2882
+ # its length, sampling rate, and encoding. You cannot submit requests to train or
2883
+ # upgrade the model until the service's analysis of all audio resources for current
2884
+ # requests completes.
2756
2885
  #
2757
2886
  # To determine the status of the service's analysis of the audio, use the **Get an
2758
2887
  # audio resource** method to poll the status of the audio. The method accepts the
@@ -2841,7 +2970,7 @@ module IBMWatson
2841
2970
  # used, their use is strongly discouraged.)
2842
2971
  # * Do not use the name of an audio resource that has already been added to the
2843
2972
  # custom model.
2844
- # @param audio_resource [String] The audio resource that is to be added to the custom acoustic model, an individual
2973
+ # @param audio_resource [File] The audio resource that is to be added to the custom acoustic model, an individual
2845
2974
  # audio file or an archive file.
2846
2975
  #
2847
2976
  # With the `curl` command, use the `--data-binary` option to upload the file for the
@@ -3009,10 +3138,15 @@ module IBMWatson
3009
3138
  # deletes all data for the customer ID, regardless of the method by which the
3010
3139
  # information was added. The method has no effect if no data is associated with the
3011
3140
  # customer ID. You must issue the request with credentials for the same instance of
3012
- # the service that was used to associate the customer ID with the data.
3013
- #
3014
- # You associate a customer ID with data by passing the `X-Watson-Metadata` header
3015
- # with a request that passes the data.
3141
+ # the service that was used to associate the customer ID with the data. You
3142
+ # associate a customer ID with data by passing the `X-Watson-Metadata` header with a
3143
+ # request that passes the data.
3144
+ #
3145
+ # **Note:** If you delete an instance of the service from the service console, all
3146
+ # data associated with that service instance is automatically deleted. This includes
3147
+ # all custom language models, corpora, grammars, and words; all custom acoustic
3148
+ # models and audio resources; all registered endpoints for the asynchronous HTTP
3149
+ # interface; and all data related to speech recognition requests.
3016
3150
  #
3017
3151
  # **See also:** [Information
3018
3152
  # security](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-information-security#information-security).