ibm_watson 1.6.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,12 +13,28 @@
13
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
-
17
- # The IBM Watson™ Personality Insights service enables applications to derive
18
- # insights from social media, enterprise data, or other digital communications. The
19
- # service uses linguistic analytics to infer individuals' intrinsic personality
20
- # characteristics, including Big Five, Needs, and Values, from digital communications such
21
- # as email, text messages, tweets, and forum posts.
16
+ #
17
+ # IBM OpenAPI SDK Code Generator Version: 3.31.0-902c9336-20210504-161156
18
+ #
19
+ # IBM Watson™ Personality Insights is discontinued. Existing instances are
20
+ # supported until 1 December 2021, but as of 1 December 2020, you cannot create new
21
+ # instances. Any instance that exists on 1 December 2021 will be deleted.<br/><br/>No
22
+ # direct replacement exists for Personality Insights. However, you can consider using [IBM
23
+ # Watson&trade; Natural Language
24
+ # Understanding](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-about)
25
+ # on IBM Cloud&reg; as part of a replacement analytic workflow for your Personality
26
+ # Insights use cases. You can use Natural Language Understanding to extract data and
27
+ # insights from text, such as keywords, categories, sentiment, emotion, and syntax. For
28
+ # more information about the personality models in Personality Insights, see [The science
29
+ # behind the
30
+ # service](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-science).
31
+ # {: deprecated}
32
+ #
33
+ # The IBM Watson Personality Insights service enables applications to derive insights from
34
+ # social media, enterprise data, or other digital communications. The service uses
35
+ # linguistic analytics to infer individuals' intrinsic personality characteristics,
36
+ # including Big Five, Needs, and Values, from digital communications such as email, text
37
+ # messages, tweets, and forum posts.
22
38
  #
23
39
  # The service can automatically infer, from potentially noisy social media, portraits of
24
40
  # individuals that reflect their personality characteristics. The service can infer
@@ -41,7 +57,6 @@ require "json"
41
57
  require "ibm_cloud_sdk_core"
42
58
  require_relative "./common.rb"
43
59
 
44
- # Module for the Watson APIs
45
60
  module IBMWatson
46
61
  ##
47
62
  # The Personality Insights V3 service.
@@ -49,33 +64,27 @@ module IBMWatson
49
64
  include Concurrent::Async
50
65
  DEFAULT_SERVICE_NAME = "personality_insights"
51
66
  DEFAULT_SERVICE_URL = "https://api.us-south.personality-insights.watson.cloud.ibm.com"
67
+ attr_accessor :version
52
68
  ##
53
69
  # @!method initialize(args)
54
70
  # Construct a new client for the Personality Insights service.
55
71
  #
56
72
  # @param args [Hash] The args to initialize with
57
- # @option args version [String] The API version date to use with the service, in
58
- # "YYYY-MM-DD" format. Whenever the API is changed in a backwards
59
- # incompatible way, a new minor version of the API is released.
60
- # The service uses the API version for the date you specify, or
61
- # the most recent version before that date. Note that you should
62
- # not programmatically specify the current date at runtime, in
63
- # case the API has been updated since your application's release.
64
- # Instead, specify a version date that is compatible with your
65
- # application, and don't change it until your application is
66
- # ready for a later version.
73
+ # @option args version [String] Release date of the version of the API you want to use. Specify dates in
74
+ # YYYY-MM-DD format. The current version is `2017-10-13`.
67
75
  # @option args service_url [String] The base service URL to use when contacting the service.
68
76
  # The base service_url may differ between IBM Cloud regions.
69
77
  # @option args authenticator [Object] The Authenticator instance to be configured for this service.
70
78
  # @option args service_name [String] The name of the service to configure. Will be used as the key to load
71
79
  # any external configuration, if applicable.
72
80
  def initialize(args = {})
81
+ warn "On 1 December 2021, Personality Insights will no longer be available. For more information, see https://github.com/watson-developer-cloud/ruby-sdk/tree/master#personality-insights-deprecation."
73
82
  @__async_initialized__ = false
74
83
  defaults = {}
75
- defaults[:version] = nil
76
84
  defaults[:service_url] = DEFAULT_SERVICE_URL
77
85
  defaults[:service_name] = DEFAULT_SERVICE_NAME
78
86
  defaults[:authenticator] = nil
87
+ defaults[:version] = nil
79
88
  user_service_url = args[:service_url] unless args[:service_url].nil?
80
89
  args = defaults.merge(args)
81
90
  @version = args[:version]
@@ -170,6 +179,8 @@ module IBMWatson
170
179
  # default, no consumption preferences are returned.
171
180
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
172
181
  def profile(content:, accept:, content_type: nil, content_language: nil, accept_language: nil, raw_scores: nil, csv_headers: nil, consumption_preferences: nil)
182
+ raise ArgumentError.new("version must be provided") if version.nil?
183
+
173
184
  raise ArgumentError.new("content must be provided") if content.nil?
174
185
 
175
186
  raise ArgumentError.new("accept must be provided") if accept.nil?
@@ -13,13 +13,22 @@
13
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
-
16
+ #
17
+ # IBM OpenAPI SDK Code Generator Version: 3.31.0-902c9336-20210504-161156
18
+ #
17
19
  # The IBM Watson&trade; Speech to Text service provides APIs that use IBM's
18
20
  # speech-recognition capabilities to produce transcripts of spoken audio. The service can
19
21
  # transcribe speech from various languages and audio formats. In addition to basic
20
22
  # transcription, the service can produce detailed information about many different aspects
21
- # of the audio. For most languages, the service supports two sampling rates, broadband and
22
- # narrowband. It returns all JSON response content in the UTF-8 character set.
23
+ # of the audio. It returns all JSON response content in the UTF-8 character set.
24
+ #
25
+ # The service supports two types of models: previous-generation models that include the
26
+ # terms `Broadband` and `Narrowband` in their names, and beta next-generation models that
27
+ # include the terms `Multimedia` and `Telephony` in their names. Broadband and multimedia
28
+ # models have minimum sampling rates of 16 kHz. Narrowband and telephony models have
29
+ # minimum sampling rates of 8 kHz. The beta next-generation models currently support fewer
30
+ # languages and features, but they offer high throughput and greater transcription
31
+ # accuracy.
23
32
  #
24
33
  # For speech recognition, the service supports synchronous and asynchronous HTTP
25
34
  # Representational State Transfer (REST) interfaces. It also supports a WebSocket
@@ -35,8 +44,9 @@
35
44
  # can recognize.
36
45
  #
37
46
  # Language model customization and acoustic model customization are generally available
38
- # for production use with all language models that are generally available. Grammars are
39
- # beta functionality for all language models that support language model customization.
47
+ # for production use with all previous-generation models that are generally available.
48
+ # Grammars are beta functionality for all previous-generation models that support language
49
+ # model customization. Next-generation models do not support customization at this time.
40
50
 
41
51
  require "concurrent"
42
52
  require "erb"
@@ -44,7 +54,6 @@ require "json"
44
54
  require "ibm_cloud_sdk_core"
45
55
  require_relative "./common.rb"
46
56
 
47
- # Module for the Watson APIs
48
57
  module IBMWatson
49
58
  ##
50
59
  # The Speech to Text V1 service.
@@ -87,8 +96,8 @@ module IBMWatson
87
96
  # among other things. The ordering of the list of models can change from call to
88
97
  # call; do not rely on an alphabetized or static list of models.
89
98
  #
90
- # **See also:** [Languages and
91
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
99
+ # **See also:** [Listing
100
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-list).
92
101
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
93
102
  def list_models
94
103
  headers = {
@@ -114,10 +123,11 @@ module IBMWatson
114
123
  # with the service. The information includes the name of the model and its minimum
115
124
  # sampling rate in Hertz, among other things.
116
125
  #
117
- # **See also:** [Languages and
118
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
126
+ # **See also:** [Listing
127
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-list).
119
128
  # @param model_id [String] The identifier of the model in the form of its name from the output of the **Get a
120
- # model** method.
129
+ # model** method. (**Note:** The model `ar-AR_BroadbandModel` is deprecated; use
130
+ # `ar-MS_BroadbandModel` instead.).
121
131
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
122
132
  def get_model(model_id:)
123
133
  raise ArgumentError.new("model_id must be provided") if model_id.nil?
@@ -142,7 +152,7 @@ module IBMWatson
142
152
  #########################
143
153
 
144
154
  ##
145
- # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
155
+ # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
146
156
  # Recognize audio.
147
157
  # Sends audio and returns transcription results for a recognition request. You can
148
158
  # pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
@@ -209,8 +219,40 @@ module IBMWatson
209
219
  # sampling rate of the audio is lower than the minimum required rate, the request
210
220
  # fails.
211
221
  #
212
- # **See also:** [Audio
213
- # formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
222
+ # **See also:** [Supported audio
223
+ # formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats).
224
+ #
225
+ #
226
+ # ### Next-generation models
227
+ #
228
+ # **Note:** The next-generation language models are beta functionality. They
229
+ # support a limited number of languages and features at this time. The supported
230
+ # languages, models, and features will increase with future releases.
231
+ #
232
+ # The service supports next-generation `Multimedia` (16 kHz) and `Telephony` (8 kHz)
233
+ # models for many languages. Next-generation models have higher throughput than the
234
+ # service's previous generation of `Broadband` and `Narrowband` models. When you use
235
+ # next-generation models, the service can return transcriptions more quickly and
236
+ # also provide noticeably better transcription accuracy.
237
+ #
238
+ # You specify a next-generation model by using the `model` query parameter, as you
239
+ # do a previous-generation model. Next-generation models support the same request
240
+ # headers as previous-generation models, but they support only the following
241
+ # additional query parameters:
242
+ # * `background_audio_suppression`
243
+ # * `inactivity_timeout`
244
+ # * `profanity_filter`
245
+ # * `redaction`
246
+ # * `smart_formatting`
247
+ # * `speaker_labels`
248
+ # * `speech_detector_sensitivity`
249
+ # * `timestamps`
250
+ #
251
+ # Many next-generation models also support the beta `low_latency` parameter, which
252
+ # is not available with previous-generation models.
253
+ #
254
+ # **See also:** [Next-generation languages and
255
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
214
256
  #
215
257
  #
216
258
  # ### Multipart speech recognition
@@ -230,18 +272,22 @@ module IBMWatson
230
272
  #
231
273
  # **See also:** [Making a multipart HTTP
232
274
  # request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-multi).
233
- # @param audio [String] The audio to transcribe.
275
+ # @param audio [File] The audio to transcribe.
234
276
  # @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
235
277
  # audio format, see **Audio formats (content types)** in the method description.
236
- # @param model [String] The identifier of the model that is to be used for the recognition request. See
237
- # [Languages and
238
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
278
+ # @param model [String] The identifier of the model that is to be used for the recognition request.
279
+ # (**Note:** The model `ar-AR_BroadbandModel` is deprecated; use
280
+ # `ar-MS_BroadbandModel` instead.) See [Languages and
281
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models) and
282
+ # [Next-generation languages and
283
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
239
284
  # @param language_customization_id [String] The customization ID (GUID) of a custom language model that is to be used with the
240
285
  # recognition request. The base model of the specified custom language model must
241
286
  # match the model specified with the `model` parameter. You must make the request
242
287
  # with credentials for the instance of the service that owns the custom model. By
243
- # default, no custom language model is used. See [Custom
244
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
288
+ # default, no custom language model is used. See [Using a custom language model for
289
+ # speech
290
+ # recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse).
245
291
  #
246
292
  #
247
293
  # **Note:** Use this parameter instead of the deprecated `customization_id`
@@ -250,14 +296,16 @@ module IBMWatson
250
296
  # recognition request. The base model of the specified custom acoustic model must
251
297
  # match the model specified with the `model` parameter. You must make the request
252
298
  # with credentials for the instance of the service that owns the custom model. By
253
- # default, no custom acoustic model is used. See [Custom
254
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
299
+ # default, no custom acoustic model is used. See [Using a custom acoustic model for
300
+ # speech
301
+ # recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acousticUse).
255
302
  # @param base_model_version [String] The version of the specified base model that is to be used with the recognition
256
303
  # request. Multiple versions of a base model can exist when a model is updated for
257
304
  # internal improvements. The parameter is intended primarily for use with custom
258
305
  # models that have been upgraded for a new base model. The default value depends on
259
- # whether the parameter is used with or without a custom model. See [Base model
260
- # version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
306
+ # whether the parameter is used with or without a custom model. See [Making speech
307
+ # recognition requests with upgraded custom
308
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade-use#custom-upgrade-use-recognition).
261
309
  # @param customization_weight [Float] If you specify the customization ID (GUID) of a custom language model with the
262
310
  # recognition request, the customization weight tells the service how much weight to
263
311
  # give to words from the custom language model compared to those from the base model
@@ -274,8 +322,8 @@ module IBMWatson
274
322
  # custom model's domain, but it can negatively affect performance on non-domain
275
323
  # phrases.
276
324
  #
277
- # See [Custom
278
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
325
+ # See [Using customization
326
+ # weight](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse#weight).
279
327
  # @param inactivity_timeout [Fixnum] The time in seconds after which, if only silence (no speech) is detected in
280
328
  # streaming audio, the connection is closed with a 400 error. The parameter is
281
329
  # useful for stopping audio submission from a live microphone when a user simply
@@ -292,34 +340,34 @@ module IBMWatson
292
340
  # for double-byte languages might be shorter. Keywords are case-insensitive.
293
341
  #
294
342
  # See [Keyword
295
- # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
343
+ # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#keyword-spotting).
296
344
  # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
297
345
  # considered to match a keyword if its confidence is greater than or equal to the
298
346
  # threshold. Specify a probability between 0.0 and 1.0. If you specify a threshold,
299
347
  # you must also specify one or more keywords. The service performs no keyword
300
348
  # spotting if you omit either parameter. See [Keyword
301
- # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
349
+ # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#keyword-spotting).
302
350
  # @param max_alternatives [Fixnum] The maximum number of alternative transcripts that the service is to return. By
303
351
  # default, the service returns a single transcript. If you specify a value of `0`,
304
352
  # the service uses the default value, `1`. See [Maximum
305
- # alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#max_alternatives).
353
+ # alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#max-alternatives).
306
354
  # @param word_alternatives_threshold [Float] A confidence value that is the lower bound for identifying a hypothesis as a
307
355
  # possible word alternative (also known as "Confusion Networks"). An alternative
308
356
  # word is considered if its confidence is greater than or equal to the threshold.
309
357
  # Specify a probability between 0.0 and 1.0. By default, the service computes no
310
358
  # alternative words. See [Word
311
- # alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_alternatives).
359
+ # alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#word-alternatives).
312
360
  # @param word_confidence [Boolean] If `true`, the service returns a confidence measure in the range of 0.0 to 1.0 for
313
361
  # each word. By default, the service returns no word confidence scores. See [Word
314
- # confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_confidence).
362
+ # confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#word-confidence).
315
363
  # @param timestamps [Boolean] If `true`, the service returns time alignment for each word. By default, no
316
364
  # timestamps are returned. See [Word
317
- # timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_timestamps).
365
+ # timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#word-timestamps).
318
366
  # @param profanity_filter [Boolean] If `true`, the service filters profanity from all output except for keyword
319
367
  # results by replacing inappropriate words with a series of asterisks. Set the
320
368
  # parameter to `false` to return results with no censoring. Applies to US English
321
- # transcription only. See [Profanity
322
- # filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#profanity_filter).
369
+ # and Japanese transcription only. See [Profanity
370
+ # filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#profanity-filtering).
323
371
  # @param smart_formatting [Boolean] If `true`, the service converts dates, times, series of digits and numbers, phone
324
372
  # numbers, currency values, and internet addresses into more readable, conventional
325
373
  # representations in the final transcript of a recognition request. For US English,
@@ -329,19 +377,21 @@ module IBMWatson
329
377
  # **Note:** Applies to US English, Japanese, and Spanish transcription only.
330
378
  #
331
379
  # See [Smart
332
- # formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#smart_formatting).
380
+ # formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#smart-formatting).
333
381
  # @param speaker_labels [Boolean] If `true`, the response includes labels that identify which words were spoken by
334
382
  # which participants in a multi-person exchange. By default, the service returns no
335
383
  # speaker labels. Setting `speaker_labels` to `true` forces the `timestamps`
336
384
  # parameter to be `true`, regardless of whether you specify `false` for the
337
385
  # parameter.
338
- #
339
- # **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
340
- # Spanish (both broadband and narrowband models) and UK English (narrowband model)
341
- # transcription only.
342
- #
343
- # See [Speaker
344
- # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
386
+ # * For previous-generation models, can be used for US English, Australian English,
387
+ # German, Japanese, Korean, and Spanish (both broadband and narrowband models) and
388
+ # UK English (narrowband model) transcription only.
389
+ # * For next-generation models, can be used for English (Australian, UK, and US),
390
+ # German, and Spanish transcription only.
391
+ #
392
+ # Restrictions and limitations apply to the use of speaker labels for both types of
393
+ # models. See [Speaker
394
+ # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-speaker-labels).
345
395
  # @param customization_id [String] **Deprecated.** Use the `language_customization_id` parameter to specify the
346
396
  # customization ID (GUID) of a custom language model that is to be used with the
347
397
  # recognition request. Do not specify both parameters with a request.
@@ -350,7 +400,8 @@ module IBMWatson
350
400
  # specify the name of the custom language model for which the grammar is defined.
351
401
  # The service recognizes only strings that are recognized by the specified grammar;
352
402
  # it does not recognize other custom words from the model's words resource. See
353
- # [Grammars](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#grammars-input).
403
+ # [Using a grammar for speech
404
+ # recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarUse).
354
405
  # @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
355
406
  # feature redacts any number that has three or more consecutive digits by replacing
356
407
  # each digit with an `X` character. It is intended to redact sensitive numeric data,
@@ -365,13 +416,13 @@ module IBMWatson
365
416
  # **Note:** Applies to US English, Japanese, and Korean transcription only.
366
417
  #
367
418
  # See [Numeric
368
- # redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
419
+ # redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#numeric-redaction).
369
420
  # @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
370
421
  # input audio. The service returns audio metrics with the final transcription
371
422
  # results. By default, the service returns no audio metrics.
372
423
  #
373
424
  # See [Audio
374
- # metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
425
+ # metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio-metrics).
375
426
  # @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
376
427
  # splits a transcript into multiple final results. If the service detects pauses or
377
428
  # extended silence before it reaches the end of the audio stream, its response can
@@ -388,7 +439,7 @@ module IBMWatson
388
439
  # Chinese is 0.6 seconds.
389
440
  #
390
441
  # See [End of phrase silence
391
- # time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
442
+ # time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#silence-time).
392
443
  # @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
393
444
  # based on semantic features of the input, for example, at the conclusion of
394
445
  # meaningful phrases such as sentences. The service bases its understanding of
@@ -398,7 +449,7 @@ module IBMWatson
398
449
  # interval.
399
450
  #
400
451
  # See [Split transcript at phrase
401
- # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
452
+ # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#split-transcript).
402
453
  # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
403
454
  # the parameter to suppress word insertions from music, coughing, and other
404
455
  # non-speech events. The service biases the audio it passes for speech recognition
@@ -410,8 +461,8 @@ module IBMWatson
410
461
  # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
411
462
  # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
412
463
  #
413
- # The values increase on a monotonic curve. See [Speech Activity
414
- # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
464
+ # The values increase on a monotonic curve. See [Speech detector
465
+ # sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity).
415
466
  # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
416
467
  # to prevent it from being transcribed as speech. Use the parameter to suppress side
417
468
  # conversations or background noise.
@@ -422,10 +473,27 @@ module IBMWatson
422
473
  # * 0.5 provides a reasonable level of audio suppression for general usage.
423
474
  # * 1.0 suppresses all audio (no audio is transcribed).
424
475
  #
425
- # The values increase on a monotonic curve. See [Speech Activity
426
- # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
476
+ # The values increase on a monotonic curve. See [Background audio
477
+ # suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression).
478
+ # @param low_latency [Boolean] If `true` for next-generation `Multimedia` and `Telephony` models that support low
479
+ # latency, directs the service to produce results even more quickly than it usually
480
+ # does. Next-generation models produce transcription results faster than
481
+ # previous-generation models. The `low_latency` parameter causes the models to
482
+ # produce results even more quickly, though the results might be less accurate when
483
+ # the parameter is used.
484
+ #
485
+ # **Note:** The parameter is beta functionality. It is not available for
486
+ # previous-generation `Broadband` and `Narrowband` models. It is available only for
487
+ # some next-generation models.
488
+ #
489
+ # * For a list of next-generation models that support low latency, see [Supported
490
+ # language
491
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
492
+ # for next-generation models.
493
+ # * For more information about the `low_latency` parameter, see [Low
494
+ # latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
427
495
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
428
- def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
496
+ def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
429
497
  raise ArgumentError.new("audio must be provided") if audio.nil?
430
498
 
431
499
  headers = {
@@ -458,7 +526,8 @@ module IBMWatson
458
526
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
459
527
  "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
460
528
  "speech_detector_sensitivity" => speech_detector_sensitivity,
461
- "background_audio_suppression" => background_audio_suppression
529
+ "background_audio_suppression" => background_audio_suppression,
530
+ "low_latency" => low_latency
462
531
  }
463
532
 
464
533
  data = audio
@@ -477,7 +546,7 @@ module IBMWatson
477
546
  end
478
547
 
479
548
  ##
480
- # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
549
+ # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
481
550
  # Sends audio for speech recognition using web sockets.
482
551
  # @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
483
552
  # @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
@@ -594,6 +663,23 @@ module IBMWatson
594
663
  #
595
664
  # The values increase on a monotonic curve. See [Speech Activity
596
665
  # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
666
+ # @param low_latency [Boolean] If `true` for next-generation `Multimedia` and `Telephony` models that support low
667
+ # latency, directs the service to produce results even more quickly than it usually
668
+ # does. Next-generation models produce transcription results faster than
669
+ # previous-generation models. The `low_latency` parameter causes the models to
670
+ # produce results even more quickly, though the results might be less accurate when
671
+ # the parameter is used.
672
+ #
673
+ # **Note:** The parameter is beta functionality. It is not available for
674
+ # previous-generation `Broadband` and `Narrowband` models. It is available only for
675
+ # some next-generation models.
676
+ #
677
+ # * For a list of next-generation models that support low latency, see [Supported
678
+ # language
679
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
680
+ # for next-generation models.
681
+ # * For more information about the `low_latency` parameter, see [Low
682
+ # latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
597
683
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
598
684
  def recognize_using_websocket(
599
685
  content_type: nil,
@@ -625,7 +711,8 @@ module IBMWatson
625
711
  end_of_phrase_silence_time: nil,
626
712
  split_transcript_at_phrase_end: nil,
627
713
  speech_detector_sensitivity: nil,
628
- background_audio_suppression: nil
714
+ background_audio_suppression: nil,
715
+ low_latency: nil
629
716
  )
630
717
  raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
631
718
  raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
@@ -667,7 +754,8 @@ module IBMWatson
667
754
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
668
755
  "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
669
756
  "speech_detector_sensitivity" => speech_detector_sensitivity,
670
- "background_audio_suppression" => background_audio_suppression
757
+ "background_audio_suppression" => background_audio_suppression,
758
+ "low_latency" => low_latency
671
759
  }
672
760
  options.delete_if { |_, v| v.nil? }
673
761
  WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
@@ -785,7 +873,7 @@ module IBMWatson
785
873
  end
786
874
 
787
875
  ##
788
- # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
876
+ # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
789
877
  # Create a job.
790
878
  # Creates a job for a new asynchronous recognition request. The job is owned by the
791
879
  # instance of the service whose credentials are used to create it. How you learn the
@@ -881,14 +969,49 @@ module IBMWatson
881
969
  # sampling rate of the audio is lower than the minimum required rate, the request
882
970
  # fails.
883
971
  #
884
- # **See also:** [Audio
885
- # formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
886
- # @param audio [String] The audio to transcribe.
972
+ # **See also:** [Supported audio
973
+ # formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats).
974
+ #
975
+ #
976
+ # ### Next-generation models
977
+ #
978
+ # **Note:** The next-generation language models are beta functionality. They
979
+ # support a limited number of languages and features at this time. The supported
980
+ # languages, models, and features will increase with future releases.
981
+ #
982
+ # The service supports next-generation `Multimedia` (16 kHz) and `Telephony` (8 kHz)
983
+ # models for many languages. Next-generation models have higher throughput than the
984
+ # service's previous generation of `Broadband` and `Narrowband` models. When you use
985
+ # next-generation models, the service can return transcriptions more quickly and
986
+ # also provide noticeably better transcription accuracy.
987
+ #
988
+ # You specify a next-generation model by using the `model` query parameter, as you
989
+ # do a previous-generation model. Next-generation models support the same request
990
+ # headers as previous-generation models, but they support only the following
991
+ # additional query parameters:
992
+ # * `background_audio_suppression`
993
+ # * `inactivity_timeout`
994
+ # * `profanity_filter`
995
+ # * `redaction`
996
+ # * `smart_formatting`
997
+ # * `speaker_labels`
998
+ # * `speech_detector_sensitivity`
999
+ # * `timestamps`
1000
+ #
1001
+ # Many next-generation models also support the beta `low_latency` parameter, which
1002
+ # is not available with previous-generation models.
1003
+ #
1004
+ # **See also:** [Next-generation languages and
1005
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
1006
+ # @param audio [File] The audio to transcribe.
887
1007
  # @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
888
1008
  # audio format, see **Audio formats (content types)** in the method description.
889
- # @param model [String] The identifier of the model that is to be used for the recognition request. See
890
- # [Languages and
891
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
1009
+ # @param model [String] The identifier of the model that is to be used for the recognition request.
1010
+ # (**Note:** The model `ar-AR_BroadbandModel` is deprecated; use
1011
+ # `ar-MS_BroadbandModel` instead.) See [Languages and
1012
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models) and
1013
+ # [Next-generation languages and
1014
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
892
1015
  # @param callback_url [String] A URL to which callback notifications are to be sent. The URL must already be
893
1016
  # successfully allowlisted by using the **Register a callback** method. You can
894
1017
  # include the same callback URL with any number of job creation requests. Omit the
@@ -927,8 +1050,9 @@ module IBMWatson
927
1050
  # recognition request. The base model of the specified custom language model must
928
1051
  # match the model specified with the `model` parameter. You must make the request
929
1052
  # with credentials for the instance of the service that owns the custom model. By
930
- # default, no custom language model is used. See [Custom
931
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
1053
+ # default, no custom language model is used. See [Using a custom language model for
1054
+ # speech
1055
+ # recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse).
932
1056
  #
933
1057
  #
934
1058
  # **Note:** Use this parameter instead of the deprecated `customization_id`
@@ -937,14 +1061,16 @@ module IBMWatson
937
1061
  # recognition request. The base model of the specified custom acoustic model must
938
1062
  # match the model specified with the `model` parameter. You must make the request
939
1063
  # with credentials for the instance of the service that owns the custom model. By
940
- # default, no custom acoustic model is used. See [Custom
941
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
1064
+ # default, no custom acoustic model is used. See [Using a custom acoustic model for
1065
+ # speech
1066
+ # recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acousticUse).
942
1067
  # @param base_model_version [String] The version of the specified base model that is to be used with the recognition
943
1068
  # request. Multiple versions of a base model can exist when a model is updated for
944
1069
  # internal improvements. The parameter is intended primarily for use with custom
945
1070
  # models that have been upgraded for a new base model. The default value depends on
946
- # whether the parameter is used with or without a custom model. See [Base model
947
- # version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
1071
+ # whether the parameter is used with or without a custom model. See [Making speech
1072
+ # recognition requests with upgraded custom
1073
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade-use#custom-upgrade-use-recognition).
948
1074
  # @param customization_weight [Float] If you specify the customization ID (GUID) of a custom language model with the
949
1075
  # recognition request, the customization weight tells the service how much weight to
950
1076
  # give to words from the custom language model compared to those from the base model
@@ -961,8 +1087,8 @@ module IBMWatson
961
1087
  # custom model's domain, but it can negatively affect performance on non-domain
962
1088
  # phrases.
963
1089
  #
964
- # See [Custom
965
- # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
1090
+ # See [Using customization
1091
+ # weight](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse#weight).
966
1092
  # @param inactivity_timeout [Fixnum] The time in seconds after which, if only silence (no speech) is detected in
967
1093
  # streaming audio, the connection is closed with a 400 error. The parameter is
968
1094
  # useful for stopping audio submission from a live microphone when a user simply
@@ -979,34 +1105,34 @@ module IBMWatson
979
1105
  # for double-byte languages might be shorter. Keywords are case-insensitive.
980
1106
  #
981
1107
  # See [Keyword
982
- # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
1108
+ # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#keyword-spotting).
983
1109
  # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
984
1110
  # considered to match a keyword if its confidence is greater than or equal to the
985
1111
  # threshold. Specify a probability between 0.0 and 1.0. If you specify a threshold,
986
1112
  # you must also specify one or more keywords. The service performs no keyword
987
1113
  # spotting if you omit either parameter. See [Keyword
988
- # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
1114
+ # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#keyword-spotting).
989
1115
  # @param max_alternatives [Fixnum] The maximum number of alternative transcripts that the service is to return. By
990
1116
  # default, the service returns a single transcript. If you specify a value of `0`,
991
1117
  # the service uses the default value, `1`. See [Maximum
992
- # alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#max_alternatives).
1118
+ # alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#max-alternatives).
993
1119
  # @param word_alternatives_threshold [Float] A confidence value that is the lower bound for identifying a hypothesis as a
994
1120
  # possible word alternative (also known as "Confusion Networks"). An alternative
995
1121
  # word is considered if its confidence is greater than or equal to the threshold.
996
1122
  # Specify a probability between 0.0 and 1.0. By default, the service computes no
997
1123
  # alternative words. See [Word
998
- # alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_alternatives).
1124
+ # alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#word-alternatives).
999
1125
  # @param word_confidence [Boolean] If `true`, the service returns a confidence measure in the range of 0.0 to 1.0 for
1000
1126
  # each word. By default, the service returns no word confidence scores. See [Word
1001
- # confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_confidence).
1127
+ # confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#word-confidence).
1002
1128
  # @param timestamps [Boolean] If `true`, the service returns time alignment for each word. By default, no
1003
1129
  # timestamps are returned. See [Word
1004
- # timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_timestamps).
1130
+ # timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#word-timestamps).
1005
1131
  # @param profanity_filter [Boolean] If `true`, the service filters profanity from all output except for keyword
1006
1132
  # results by replacing inappropriate words with a series of asterisks. Set the
1007
1133
  # parameter to `false` to return results with no censoring. Applies to US English
1008
- # transcription only. See [Profanity
1009
- # filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#profanity_filter).
1134
+ # and Japanese transcription only. See [Profanity
1135
+ # filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#profanity-filtering).
1010
1136
  # @param smart_formatting [Boolean] If `true`, the service converts dates, times, series of digits and numbers, phone
1011
1137
  # numbers, currency values, and internet addresses into more readable, conventional
1012
1138
  # representations in the final transcript of a recognition request. For US English,
@@ -1016,19 +1142,21 @@ module IBMWatson
1016
1142
  # **Note:** Applies to US English, Japanese, and Spanish transcription only.
1017
1143
  #
1018
1144
  # See [Smart
1019
- # formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#smart_formatting).
1145
+ # formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#smart-formatting).
1020
1146
  # @param speaker_labels [Boolean] If `true`, the response includes labels that identify which words were spoken by
1021
1147
  # which participants in a multi-person exchange. By default, the service returns no
1022
1148
  # speaker labels. Setting `speaker_labels` to `true` forces the `timestamps`
1023
1149
  # parameter to be `true`, regardless of whether you specify `false` for the
1024
1150
  # parameter.
1025
- #
1026
- # **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
1027
- # Spanish (both broadband and narrowband models) and UK English (narrowband model)
1028
- # transcription only.
1029
- #
1030
- # See [Speaker
1031
- # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
1151
+ # * For previous-generation models, can be used for US English, Australian English,
1152
+ # German, Japanese, Korean, and Spanish (both broadband and narrowband models) and
1153
+ # UK English (narrowband model) transcription only.
1154
+ # * For next-generation models, can be used for English (Australian, UK, and US),
1155
+ # German, and Spanish transcription only.
1156
+ #
1157
+ # Restrictions and limitations apply to the use of speaker labels for both types of
1158
+ # models. See [Speaker
1159
+ # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-speaker-labels).
1032
1160
  # @param customization_id [String] **Deprecated.** Use the `language_customization_id` parameter to specify the
1033
1161
  # customization ID (GUID) of a custom language model that is to be used with the
1034
1162
  # recognition request. Do not specify both parameters with a request.
@@ -1037,7 +1165,8 @@ module IBMWatson
1037
1165
  # specify the name of the custom language model for which the grammar is defined.
1038
1166
  # The service recognizes only strings that are recognized by the specified grammar;
1039
1167
  # it does not recognize other custom words from the model's words resource. See
1040
- # [Grammars](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#grammars-input).
1168
+ # [Using a grammar for speech
1169
+ # recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarUse).
1041
1170
  # @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
1042
1171
  # feature redacts any number that has three or more consecutive digits by replacing
1043
1172
  # each digit with an `X` character. It is intended to redact sensitive numeric data,
@@ -1052,7 +1181,7 @@ module IBMWatson
1052
1181
  # **Note:** Applies to US English, Japanese, and Korean transcription only.
1053
1182
  #
1054
1183
  # See [Numeric
1055
- # redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
1184
+ # redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#numeric-redaction).
1056
1185
  # @param processing_metrics [Boolean] If `true`, requests processing metrics about the service's transcription of the
1057
1186
  # input audio. The service returns processing metrics at the interval specified by
1058
1187
  # the `processing_metrics_interval` parameter. It also returns processing metrics
@@ -1060,7 +1189,7 @@ module IBMWatson
1060
1189
  # the service returns no processing metrics.
1061
1190
  #
1062
1191
  # See [Processing
1063
- # metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing_metrics).
1192
+ # metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing-metrics).
1064
1193
  # @param processing_metrics_interval [Float] Specifies the interval in real wall-clock seconds at which the service is to
1065
1194
  # return processing metrics. The parameter is ignored unless the
1066
1195
  # `processing_metrics` parameter is set to `true`.
@@ -1074,13 +1203,13 @@ module IBMWatson
1074
1203
  # the service returns processing metrics only for transcription events.
1075
1204
  #
1076
1205
  # See [Processing
1077
- # metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing_metrics).
1206
+ # metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing-metrics).
1078
1207
  # @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
1079
1208
  # input audio. The service returns audio metrics with the final transcription
1080
1209
  # results. By default, the service returns no audio metrics.
1081
1210
  #
1082
1211
  # See [Audio
1083
- # metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
1212
+ # metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio-metrics).
1084
1213
  # @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
1085
1214
  # splits a transcript into multiple final results. If the service detects pauses or
1086
1215
  # extended silence before it reaches the end of the audio stream, its response can
@@ -1097,7 +1226,7 @@ module IBMWatson
1097
1226
  # Chinese is 0.6 seconds.
1098
1227
  #
1099
1228
  # See [End of phrase silence
1100
- # time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
1229
+ # time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#silence-time).
1101
1230
  # @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
1102
1231
  # based on semantic features of the input, for example, at the conclusion of
1103
1232
  # meaningful phrases such as sentences. The service bases its understanding of
@@ -1107,7 +1236,7 @@ module IBMWatson
1107
1236
  # interval.
1108
1237
  #
1109
1238
  # See [Split transcript at phrase
1110
- # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
1239
+ # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#split-transcript).
1111
1240
  # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
1112
1241
  # the parameter to suppress word insertions from music, coughing, and other
1113
1242
  # non-speech events. The service biases the audio it passes for speech recognition
@@ -1119,8 +1248,8 @@ module IBMWatson
1119
1248
  # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
1120
1249
  # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
1121
1250
  #
1122
- # The values increase on a monotonic curve. See [Speech Activity
1123
- # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
1251
+ # The values increase on a monotonic curve. See [Speech detector
1252
+ # sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity).
1124
1253
  # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
1125
1254
  # to prevent it from being transcribed as speech. Use the parameter to suppress side
1126
1255
  # conversations or background noise.
@@ -1131,10 +1260,27 @@ module IBMWatson
1131
1260
  # * 0.5 provides a reasonable level of audio suppression for general usage.
1132
1261
  # * 1.0 suppresses all audio (no audio is transcribed).
1133
1262
  #
1134
- # The values increase on a monotonic curve. See [Speech Activity
1135
- # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
1263
+ # The values increase on a monotonic curve. See [Background audio
1264
+ # suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression).
1265
+ # @param low_latency [Boolean] If `true` for next-generation `Multimedia` and `Telephony` models that support low
1266
+ # latency, directs the service to produce results even more quickly than it usually
1267
+ # does. Next-generation models produce transcription results faster than
1268
+ # previous-generation models. The `low_latency` parameter causes the models to
1269
+ # produce results even more quickly, though the results might be less accurate when
1270
+ # the parameter is used.
1271
+ #
1272
+ # **Note:** The parameter is beta functionality. It is not available for
1273
+ # previous-generation `Broadband` and `Narrowband` models. It is available only for
1274
+ # some next-generation models.
1275
+ #
1276
+ # * For a list of next-generation models that support low latency, see [Supported
1277
+ # language
1278
+ # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
1279
+ # for next-generation models.
1280
+ # * For more information about the `low_latency` parameter, see [Low
1281
+ # latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
1136
1282
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1137
- def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
1283
+ def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
1138
1284
  raise ArgumentError.new("audio must be provided") if audio.nil?
1139
1285
 
1140
1286
  headers = {
@@ -1173,7 +1319,8 @@ module IBMWatson
1173
1319
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
1174
1320
  "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
1175
1321
  "speech_detector_sensitivity" => speech_detector_sensitivity,
1176
- "background_audio_suppression" => background_audio_suppression
1322
+ "background_audio_suppression" => background_audio_suppression,
1323
+ "low_latency" => low_latency
1177
1324
  }
1178
1325
 
1179
1326
  data = audio
@@ -1391,9 +1538,12 @@ module IBMWatson
1391
1538
  # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language).
1392
1539
  # @param language [String] The identifier of the language for which custom language or custom acoustic models
1393
1540
  # are to be returned. Omit the parameter to see all custom language or custom
1394
- # acoustic models that are owned by the requesting credentials. **Note:** The
1395
- # `ar-AR` (Modern Standard Arabic) and `zh-CN` (Mandarin Chinese) languages are not
1396
- # available for language model customization.
1541
+ # acoustic models that are owned by the requesting credentials. (**Note:** The
1542
+ # identifier `ar-AR` is deprecated; use `ar-MS` instead.)
1543
+ #
1544
+ # To determine the languages for which customization is available, see [Language
1545
+ # support for
1546
+ # customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
1397
1547
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1398
1548
  def list_language_models(language: nil)
1399
1549
  headers = {
@@ -1544,6 +1694,9 @@ module IBMWatson
1544
1694
  # The value that you assign is used for all recognition requests that use the model.
1545
1695
  # You can override it for any recognition request by specifying a customization
1546
1696
  # weight for that request.
1697
+ #
1698
+ # See [Using customization
1699
+ # weight](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse#weight).
1547
1700
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1548
1701
  def train_language_model(customization_id:, word_type_to_add: nil, customization_weight: nil)
1549
1702
  raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
@@ -1625,7 +1778,7 @@ module IBMWatson
1625
1778
  # subsequent requests for the model until the upgrade completes.
1626
1779
  #
1627
1780
  # **See also:** [Upgrading a custom language
1628
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeLanguage).
1781
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade#custom-upgrade-language).
1629
1782
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1630
1783
  # the request. You must make the request with credentials for the instance of the
1631
1784
  # service that owns the custom model.
@@ -2464,7 +2617,8 @@ module IBMWatson
2464
2617
  # custom model`.
2465
2618
  # @param base_model_name [String] The name of the base language model that is to be customized by the new custom
2466
2619
  # acoustic model. The new custom model can be used only with the base model that it
2467
- # customizes.
2620
+ # customizes. (**Note:** The model `ar-AR_BroadbandModel` is deprecated; use
2621
+ # `ar-MS_BroadbandModel` instead.)
2468
2622
  #
2469
2623
  # To determine whether a base model supports acoustic model customization, refer to
2470
2624
  # [Language support for
@@ -2513,9 +2667,12 @@ module IBMWatson
2513
2667
  # models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic).
2514
2668
  # @param language [String] The identifier of the language for which custom language or custom acoustic models
2515
2669
  # are to be returned. Omit the parameter to see all custom language or custom
2516
- # acoustic models that are owned by the requesting credentials. **Note:** The
2517
- # `ar-AR` (Modern Standard Arabic) and `zh-CN` (Mandarin Chinese) languages are not
2518
- # available for language model customization.
2670
+ # acoustic models that are owned by the requesting credentials. (**Note:** The
2671
+ # identifier `ar-AR` is deprecated; use `ar-MS` instead.)
2672
+ #
2673
+ # To determine the languages for which customization is available, see [Language
2674
+ # support for
2675
+ # customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
2519
2676
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
2520
2677
  def list_acoustic_models(language: nil)
2521
2678
  headers = {
@@ -2613,14 +2770,14 @@ module IBMWatson
2613
2770
  # it. You must use credentials for the instance of the service that owns a model to
2614
2771
  # train it.
2615
2772
  #
2616
- # The training method is asynchronous. It can take on the order of minutes or hours
2617
- # to complete depending on the total amount of audio data on which the custom
2618
- # acoustic model is being trained and the current load on the service. Typically,
2619
- # training a custom acoustic model takes approximately two to four times the length
2620
- # of its audio data. The actual time depends on the model being trained and the
2621
- # nature of the audio, such as whether the audio is clean or noisy. The method
2622
- # returns an HTTP 200 response code to indicate that the training process has begun.
2623
- #
2773
+ # The training method is asynchronous. Training time depends on the cumulative
2774
+ # amount of audio data that the custom acoustic model contains and the current load
2775
+ # on the service. When you train or retrain a model, the service uses all of the
2776
+ # model's audio data in the training. Training a custom acoustic model takes
2777
+ # approximately as long as the length of its cumulative audio data. For example, it
2778
+ # takes approximately 2 hours to train a model that contains a total of 2 hours of
2779
+ # audio. The method returns an HTTP 200 response code to indicate that the training
2780
+ # process has begun.
2624
2781
  #
2625
2782
  # You can monitor the status of the training by using the **Get a custom acoustic
2626
2783
  # model** method to poll the model's status. Use a loop to check the status once a
@@ -2765,7 +2922,7 @@ module IBMWatson
2765
2922
  # acoustic model was not trained with a custom language model.
2766
2923
  #
2767
2924
  # **See also:** [Upgrading a custom acoustic
2768
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeAcoustic).
2925
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade#custom-upgrade-acoustic).
2769
2926
  # @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
2770
2927
  # the request. You must make the request with credentials for the instance of the
2771
2928
  # service that owns the custom model.
@@ -2779,7 +2936,7 @@ module IBMWatson
2779
2936
  # upgrade of a custom acoustic model that is trained with a custom language model,
2780
2937
  # and only if you receive a 400 response code and the message `No input data
2781
2938
  # modified since last training`. See [Upgrading a custom acoustic
2782
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeAcoustic).
2939
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade#custom-upgrade-acoustic).
2783
2940
  # @return [nil]
2784
2941
  def upgrade_acoustic_model(customization_id:, custom_language_model_id: nil, force: nil)
2785
2942
  raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
@@ -2917,8 +3074,8 @@ module IBMWatson
2917
3074
  # If the sampling rate of the audio is lower than the minimum required rate, the
2918
3075
  # service labels the audio file as `invalid`.
2919
3076
  #
2920
- # **See also:** [Audio
2921
- # formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
3077
+ # **See also:** [Supported audio
3078
+ # formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats).
2922
3079
  #
2923
3080
  #
2924
3081
  # ### Content types for archive-type resources
@@ -2964,7 +3121,7 @@ module IBMWatson
2964
3121
  # used, their use is strongly discouraged.)
2965
3122
  # * Do not use the name of an audio resource that has already been added to the
2966
3123
  # custom model.
2967
- # @param audio_resource [String] The audio resource that is to be added to the custom acoustic model, an individual
3124
+ # @param audio_resource [File] The audio resource that is to be added to the custom acoustic model, an individual
2968
3125
  # audio file or an archive file.
2969
3126
  #
2970
3127
  # With the `curl` command, use the `--data-binary` option to upload the file for the