ibm_watson 1.6.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +42 -4
- data/lib/ibm_watson/assistant_v1.rb +277 -81
- data/lib/ibm_watson/assistant_v2.rb +100 -22
- data/lib/ibm_watson/compare_comply_v1.rb +44 -23
- data/lib/ibm_watson/discovery_v1.rb +132 -14
- data/lib/ibm_watson/discovery_v2.rb +234 -18
- data/lib/ibm_watson/language_translator_v3.rb +59 -27
- data/lib/ibm_watson/natural_language_classifier_v1.rb +3 -2
- data/lib/ibm_watson/natural_language_understanding_v1.rb +705 -14
- data/lib/ibm_watson/personality_insights_v3.rb +29 -18
- data/lib/ibm_watson/speech_to_text_v1.rb +278 -121
- data/lib/ibm_watson/text_to_speech_v1.rb +689 -130
- data/lib/ibm_watson/tone_analyzer_v3.rb +11 -13
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +32 -16
- data/lib/ibm_watson/visual_recognition_v4.rb +67 -23
- data/test/integration/test_assistant_v1.rb +9 -0
- data/test/integration/test_assistant_v2.rb +9 -0
- data/test/integration/test_discovery_v2.rb +29 -0
- data/test/integration/test_natural_language_understanding_v1.rb +134 -1
- data/test/integration/test_text_to_speech_v1.rb +60 -3
- data/test/unit/test_assistant_v1.rb +52 -1
- data/test/unit/test_assistant_v2.rb +51 -0
- data/test/unit/test_discovery_v2.rb +30 -1
- data/test/unit/test_natural_language_understanding_v1.rb +231 -0
- data/test/unit/test_text_to_speech_v1.rb +152 -7
- metadata +12 -11
@@ -13,12 +13,28 @@
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
|
-
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
16
|
+
#
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.31.0-902c9336-20210504-161156
|
18
|
+
#
|
19
|
+
# IBM Watson™ Personality Insights is discontinued. Existing instances are
|
20
|
+
# supported until 1 December 2021, but as of 1 December 2020, you cannot create new
|
21
|
+
# instances. Any instance that exists on 1 December 2021 will be deleted.<br/><br/>No
|
22
|
+
# direct replacement exists for Personality Insights. However, you can consider using [IBM
|
23
|
+
# Watson™ Natural Language
|
24
|
+
# Understanding](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-about)
|
25
|
+
# on IBM Cloud® as part of a replacement analytic workflow for your Personality
|
26
|
+
# Insights use cases. You can use Natural Language Understanding to extract data and
|
27
|
+
# insights from text, such as keywords, categories, sentiment, emotion, and syntax. For
|
28
|
+
# more information about the personality models in Personality Insights, see [The science
|
29
|
+
# behind the
|
30
|
+
# service](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-science).
|
31
|
+
# {: deprecated}
|
32
|
+
#
|
33
|
+
# The IBM Watson Personality Insights service enables applications to derive insights from
|
34
|
+
# social media, enterprise data, or other digital communications. The service uses
|
35
|
+
# linguistic analytics to infer individuals' intrinsic personality characteristics,
|
36
|
+
# including Big Five, Needs, and Values, from digital communications such as email, text
|
37
|
+
# messages, tweets, and forum posts.
|
22
38
|
#
|
23
39
|
# The service can automatically infer, from potentially noisy social media, portraits of
|
24
40
|
# individuals that reflect their personality characteristics. The service can infer
|
@@ -41,7 +57,6 @@ require "json"
|
|
41
57
|
require "ibm_cloud_sdk_core"
|
42
58
|
require_relative "./common.rb"
|
43
59
|
|
44
|
-
# Module for the Watson APIs
|
45
60
|
module IBMWatson
|
46
61
|
##
|
47
62
|
# The Personality Insights V3 service.
|
@@ -49,33 +64,27 @@ module IBMWatson
|
|
49
64
|
include Concurrent::Async
|
50
65
|
DEFAULT_SERVICE_NAME = "personality_insights"
|
51
66
|
DEFAULT_SERVICE_URL = "https://api.us-south.personality-insights.watson.cloud.ibm.com"
|
67
|
+
attr_accessor :version
|
52
68
|
##
|
53
69
|
# @!method initialize(args)
|
54
70
|
# Construct a new client for the Personality Insights service.
|
55
71
|
#
|
56
72
|
# @param args [Hash] The args to initialize with
|
57
|
-
# @option args version [String]
|
58
|
-
#
|
59
|
-
# incompatible way, a new minor version of the API is released.
|
60
|
-
# The service uses the API version for the date you specify, or
|
61
|
-
# the most recent version before that date. Note that you should
|
62
|
-
# not programmatically specify the current date at runtime, in
|
63
|
-
# case the API has been updated since your application's release.
|
64
|
-
# Instead, specify a version date that is compatible with your
|
65
|
-
# application, and don't change it until your application is
|
66
|
-
# ready for a later version.
|
73
|
+
# @option args version [String] Release date of the version of the API you want to use. Specify dates in
|
74
|
+
# YYYY-MM-DD format. The current version is `2017-10-13`.
|
67
75
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
68
76
|
# The base service_url may differ between IBM Cloud regions.
|
69
77
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
70
78
|
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
71
79
|
# any external configuration, if applicable.
|
72
80
|
def initialize(args = {})
|
81
|
+
warn "On 1 December 2021, Personality Insights will no longer be available. For more information, see https://github.com/watson-developer-cloud/ruby-sdk/tree/master#personality-insights-deprecation."
|
73
82
|
@__async_initialized__ = false
|
74
83
|
defaults = {}
|
75
|
-
defaults[:version] = nil
|
76
84
|
defaults[:service_url] = DEFAULT_SERVICE_URL
|
77
85
|
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
78
86
|
defaults[:authenticator] = nil
|
87
|
+
defaults[:version] = nil
|
79
88
|
user_service_url = args[:service_url] unless args[:service_url].nil?
|
80
89
|
args = defaults.merge(args)
|
81
90
|
@version = args[:version]
|
@@ -170,6 +179,8 @@ module IBMWatson
|
|
170
179
|
# default, no consumption preferences are returned.
|
171
180
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
172
181
|
def profile(content:, accept:, content_type: nil, content_language: nil, accept_language: nil, raw_scores: nil, csv_headers: nil, consumption_preferences: nil)
|
182
|
+
raise ArgumentError.new("version must be provided") if version.nil?
|
183
|
+
|
173
184
|
raise ArgumentError.new("content must be provided") if content.nil?
|
174
185
|
|
175
186
|
raise ArgumentError.new("accept must be provided") if accept.nil?
|
@@ -13,13 +13,22 @@
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
|
-
|
16
|
+
#
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.31.0-902c9336-20210504-161156
|
18
|
+
#
|
17
19
|
# The IBM Watson™ Speech to Text service provides APIs that use IBM's
|
18
20
|
# speech-recognition capabilities to produce transcripts of spoken audio. The service can
|
19
21
|
# transcribe speech from various languages and audio formats. In addition to basic
|
20
22
|
# transcription, the service can produce detailed information about many different aspects
|
21
|
-
# of the audio.
|
22
|
-
#
|
23
|
+
# of the audio. It returns all JSON response content in the UTF-8 character set.
|
24
|
+
#
|
25
|
+
# The service supports two types of models: previous-generation models that include the
|
26
|
+
# terms `Broadband` and `Narrowband` in their names, and beta next-generation models that
|
27
|
+
# include the terms `Multimedia` and `Telephony` in their names. Broadband and multimedia
|
28
|
+
# models have minimum sampling rates of 16 kHz. Narrowband and telephony models have
|
29
|
+
# minimum sampling rates of 8 kHz. The beta next-generation models currently support fewer
|
30
|
+
# languages and features, but they offer high throughput and greater transcription
|
31
|
+
# accuracy.
|
23
32
|
#
|
24
33
|
# For speech recognition, the service supports synchronous and asynchronous HTTP
|
25
34
|
# Representational State Transfer (REST) interfaces. It also supports a WebSocket
|
@@ -35,8 +44,9 @@
|
|
35
44
|
# can recognize.
|
36
45
|
#
|
37
46
|
# Language model customization and acoustic model customization are generally available
|
38
|
-
# for production use with all
|
39
|
-
# beta functionality for all
|
47
|
+
# for production use with all previous-generation models that are generally available.
|
48
|
+
# Grammars are beta functionality for all previous-generation models that support language
|
49
|
+
# model customization. Next-generation models do not support customization at this time.
|
40
50
|
|
41
51
|
require "concurrent"
|
42
52
|
require "erb"
|
@@ -44,7 +54,6 @@ require "json"
|
|
44
54
|
require "ibm_cloud_sdk_core"
|
45
55
|
require_relative "./common.rb"
|
46
56
|
|
47
|
-
# Module for the Watson APIs
|
48
57
|
module IBMWatson
|
49
58
|
##
|
50
59
|
# The Speech to Text V1 service.
|
@@ -87,8 +96,8 @@ module IBMWatson
|
|
87
96
|
# among other things. The ordering of the list of models can change from call to
|
88
97
|
# call; do not rely on an alphabetized or static list of models.
|
89
98
|
#
|
90
|
-
# **See also:** [
|
91
|
-
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models
|
99
|
+
# **See also:** [Listing
|
100
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-list).
|
92
101
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
93
102
|
def list_models
|
94
103
|
headers = {
|
@@ -114,10 +123,11 @@ module IBMWatson
|
|
114
123
|
# with the service. The information includes the name of the model and its minimum
|
115
124
|
# sampling rate in Hertz, among other things.
|
116
125
|
#
|
117
|
-
# **See also:** [
|
118
|
-
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models
|
126
|
+
# **See also:** [Listing
|
127
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-list).
|
119
128
|
# @param model_id [String] The identifier of the model in the form of its name from the output of the **Get a
|
120
|
-
# model** method.
|
129
|
+
# model** method. (**Note:** The model `ar-AR_BroadbandModel` is deprecated; use
|
130
|
+
# `ar-MS_BroadbandModel` instead.).
|
121
131
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
122
132
|
def get_model(model_id:)
|
123
133
|
raise ArgumentError.new("model_id must be provided") if model_id.nil?
|
@@ -142,7 +152,7 @@ module IBMWatson
|
|
142
152
|
#########################
|
143
153
|
|
144
154
|
##
|
145
|
-
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
155
|
+
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
|
146
156
|
# Recognize audio.
|
147
157
|
# Sends audio and returns transcription results for a recognition request. You can
|
148
158
|
# pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
|
@@ -209,8 +219,40 @@ module IBMWatson
|
|
209
219
|
# sampling rate of the audio is lower than the minimum required rate, the request
|
210
220
|
# fails.
|
211
221
|
#
|
212
|
-
# **See also:** [
|
213
|
-
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats
|
222
|
+
# **See also:** [Supported audio
|
223
|
+
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats).
|
224
|
+
#
|
225
|
+
#
|
226
|
+
# ### Next-generation models
|
227
|
+
#
|
228
|
+
# **Note:** The next-generation language models are beta functionality. They
|
229
|
+
# support a limited number of languages and features at this time. The supported
|
230
|
+
# languages, models, and features will increase with future releases.
|
231
|
+
#
|
232
|
+
# The service supports next-generation `Multimedia` (16 kHz) and `Telephony` (8 kHz)
|
233
|
+
# models for many languages. Next-generation models have higher throughput than the
|
234
|
+
# service's previous generation of `Broadband` and `Narrowband` models. When you use
|
235
|
+
# next-generation models, the service can return transcriptions more quickly and
|
236
|
+
# also provide noticeably better transcription accuracy.
|
237
|
+
#
|
238
|
+
# You specify a next-generation model by using the `model` query parameter, as you
|
239
|
+
# do a previous-generation model. Next-generation models support the same request
|
240
|
+
# headers as previous-generation models, but they support only the following
|
241
|
+
# additional query parameters:
|
242
|
+
# * `background_audio_suppression`
|
243
|
+
# * `inactivity_timeout`
|
244
|
+
# * `profanity_filter`
|
245
|
+
# * `redaction`
|
246
|
+
# * `smart_formatting`
|
247
|
+
# * `speaker_labels`
|
248
|
+
# * `speech_detector_sensitivity`
|
249
|
+
# * `timestamps`
|
250
|
+
#
|
251
|
+
# Many next-generation models also support the beta `low_latency` parameter, which
|
252
|
+
# is not available with previous-generation models.
|
253
|
+
#
|
254
|
+
# **See also:** [Next-generation languages and
|
255
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
|
214
256
|
#
|
215
257
|
#
|
216
258
|
# ### Multipart speech recognition
|
@@ -230,18 +272,22 @@ module IBMWatson
|
|
230
272
|
#
|
231
273
|
# **See also:** [Making a multipart HTTP
|
232
274
|
# request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-multi).
|
233
|
-
# @param audio [
|
275
|
+
# @param audio [File] The audio to transcribe.
|
234
276
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
235
277
|
# audio format, see **Audio formats (content types)** in the method description.
|
236
|
-
# @param model [String] The identifier of the model that is to be used for the recognition request.
|
237
|
-
#
|
238
|
-
#
|
278
|
+
# @param model [String] The identifier of the model that is to be used for the recognition request.
|
279
|
+
# (**Note:** The model `ar-AR_BroadbandModel` is deprecated; use
|
280
|
+
# `ar-MS_BroadbandModel` instead.) See [Languages and
|
281
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models) and
|
282
|
+
# [Next-generation languages and
|
283
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
|
239
284
|
# @param language_customization_id [String] The customization ID (GUID) of a custom language model that is to be used with the
|
240
285
|
# recognition request. The base model of the specified custom language model must
|
241
286
|
# match the model specified with the `model` parameter. You must make the request
|
242
287
|
# with credentials for the instance of the service that owns the custom model. By
|
243
|
-
# default, no custom language model is used. See [
|
244
|
-
#
|
288
|
+
# default, no custom language model is used. See [Using a custom language model for
|
289
|
+
# speech
|
290
|
+
# recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse).
|
245
291
|
#
|
246
292
|
#
|
247
293
|
# **Note:** Use this parameter instead of the deprecated `customization_id`
|
@@ -250,14 +296,16 @@ module IBMWatson
|
|
250
296
|
# recognition request. The base model of the specified custom acoustic model must
|
251
297
|
# match the model specified with the `model` parameter. You must make the request
|
252
298
|
# with credentials for the instance of the service that owns the custom model. By
|
253
|
-
# default, no custom acoustic model is used. See [
|
254
|
-
#
|
299
|
+
# default, no custom acoustic model is used. See [Using a custom acoustic model for
|
300
|
+
# speech
|
301
|
+
# recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acousticUse).
|
255
302
|
# @param base_model_version [String] The version of the specified base model that is to be used with the recognition
|
256
303
|
# request. Multiple versions of a base model can exist when a model is updated for
|
257
304
|
# internal improvements. The parameter is intended primarily for use with custom
|
258
305
|
# models that have been upgraded for a new base model. The default value depends on
|
259
|
-
# whether the parameter is used with or without a custom model. See [
|
260
|
-
#
|
306
|
+
# whether the parameter is used with or without a custom model. See [Making speech
|
307
|
+
# recognition requests with upgraded custom
|
308
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade-use#custom-upgrade-use-recognition).
|
261
309
|
# @param customization_weight [Float] If you specify the customization ID (GUID) of a custom language model with the
|
262
310
|
# recognition request, the customization weight tells the service how much weight to
|
263
311
|
# give to words from the custom language model compared to those from the base model
|
@@ -274,8 +322,8 @@ module IBMWatson
|
|
274
322
|
# custom model's domain, but it can negatively affect performance on non-domain
|
275
323
|
# phrases.
|
276
324
|
#
|
277
|
-
# See [
|
278
|
-
#
|
325
|
+
# See [Using customization
|
326
|
+
# weight](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse#weight).
|
279
327
|
# @param inactivity_timeout [Fixnum] The time in seconds after which, if only silence (no speech) is detected in
|
280
328
|
# streaming audio, the connection is closed with a 400 error. The parameter is
|
281
329
|
# useful for stopping audio submission from a live microphone when a user simply
|
@@ -292,34 +340,34 @@ module IBMWatson
|
|
292
340
|
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
293
341
|
#
|
294
342
|
# See [Keyword
|
295
|
-
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
343
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#keyword-spotting).
|
296
344
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
297
345
|
# considered to match a keyword if its confidence is greater than or equal to the
|
298
346
|
# threshold. Specify a probability between 0.0 and 1.0. If you specify a threshold,
|
299
347
|
# you must also specify one or more keywords. The service performs no keyword
|
300
348
|
# spotting if you omit either parameter. See [Keyword
|
301
|
-
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
349
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#keyword-spotting).
|
302
350
|
# @param max_alternatives [Fixnum] The maximum number of alternative transcripts that the service is to return. By
|
303
351
|
# default, the service returns a single transcript. If you specify a value of `0`,
|
304
352
|
# the service uses the default value, `1`. See [Maximum
|
305
|
-
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
353
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#max-alternatives).
|
306
354
|
# @param word_alternatives_threshold [Float] A confidence value that is the lower bound for identifying a hypothesis as a
|
307
355
|
# possible word alternative (also known as "Confusion Networks"). An alternative
|
308
356
|
# word is considered if its confidence is greater than or equal to the threshold.
|
309
357
|
# Specify a probability between 0.0 and 1.0. By default, the service computes no
|
310
358
|
# alternative words. See [Word
|
311
|
-
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
359
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#word-alternatives).
|
312
360
|
# @param word_confidence [Boolean] If `true`, the service returns a confidence measure in the range of 0.0 to 1.0 for
|
313
361
|
# each word. By default, the service returns no word confidence scores. See [Word
|
314
|
-
# confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
362
|
+
# confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#word-confidence).
|
315
363
|
# @param timestamps [Boolean] If `true`, the service returns time alignment for each word. By default, no
|
316
364
|
# timestamps are returned. See [Word
|
317
|
-
# timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
365
|
+
# timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#word-timestamps).
|
318
366
|
# @param profanity_filter [Boolean] If `true`, the service filters profanity from all output except for keyword
|
319
367
|
# results by replacing inappropriate words with a series of asterisks. Set the
|
320
368
|
# parameter to `false` to return results with no censoring. Applies to US English
|
321
|
-
# transcription only. See [Profanity
|
322
|
-
# filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
369
|
+
# and Japanese transcription only. See [Profanity
|
370
|
+
# filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#profanity-filtering).
|
323
371
|
# @param smart_formatting [Boolean] If `true`, the service converts dates, times, series of digits and numbers, phone
|
324
372
|
# numbers, currency values, and internet addresses into more readable, conventional
|
325
373
|
# representations in the final transcript of a recognition request. For US English,
|
@@ -329,19 +377,21 @@ module IBMWatson
|
|
329
377
|
# **Note:** Applies to US English, Japanese, and Spanish transcription only.
|
330
378
|
#
|
331
379
|
# See [Smart
|
332
|
-
# formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
380
|
+
# formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#smart-formatting).
|
333
381
|
# @param speaker_labels [Boolean] If `true`, the response includes labels that identify which words were spoken by
|
334
382
|
# which participants in a multi-person exchange. By default, the service returns no
|
335
383
|
# speaker labels. Setting `speaker_labels` to `true` forces the `timestamps`
|
336
384
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
337
385
|
# parameter.
|
338
|
-
#
|
339
|
-
#
|
340
|
-
#
|
341
|
-
#
|
342
|
-
#
|
343
|
-
#
|
344
|
-
# labels
|
386
|
+
# * For previous-generation models, can be used for US English, Australian English,
|
387
|
+
# German, Japanese, Korean, and Spanish (both broadband and narrowband models) and
|
388
|
+
# UK English (narrowband model) transcription only.
|
389
|
+
# * For next-generation models, can be used for English (Australian, UK, and US),
|
390
|
+
# German, and Spanish transcription only.
|
391
|
+
#
|
392
|
+
# Restrictions and limitations apply to the use of speaker labels for both types of
|
393
|
+
# models. See [Speaker
|
394
|
+
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-speaker-labels).
|
345
395
|
# @param customization_id [String] **Deprecated.** Use the `language_customization_id` parameter to specify the
|
346
396
|
# customization ID (GUID) of a custom language model that is to be used with the
|
347
397
|
# recognition request. Do not specify both parameters with a request.
|
@@ -350,7 +400,8 @@ module IBMWatson
|
|
350
400
|
# specify the name of the custom language model for which the grammar is defined.
|
351
401
|
# The service recognizes only strings that are recognized by the specified grammar;
|
352
402
|
# it does not recognize other custom words from the model's words resource. See
|
353
|
-
# [
|
403
|
+
# [Using a grammar for speech
|
404
|
+
# recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarUse).
|
354
405
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
355
406
|
# feature redacts any number that has three or more consecutive digits by replacing
|
356
407
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -365,13 +416,13 @@ module IBMWatson
|
|
365
416
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
366
417
|
#
|
367
418
|
# See [Numeric
|
368
|
-
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
419
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#numeric-redaction).
|
369
420
|
# @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
|
370
421
|
# input audio. The service returns audio metrics with the final transcription
|
371
422
|
# results. By default, the service returns no audio metrics.
|
372
423
|
#
|
373
424
|
# See [Audio
|
374
|
-
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#
|
425
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio-metrics).
|
375
426
|
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
376
427
|
# splits a transcript into multiple final results. If the service detects pauses or
|
377
428
|
# extended silence before it reaches the end of the audio stream, its response can
|
@@ -388,7 +439,7 @@ module IBMWatson
|
|
388
439
|
# Chinese is 0.6 seconds.
|
389
440
|
#
|
390
441
|
# See [End of phrase silence
|
391
|
-
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
442
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#silence-time).
|
392
443
|
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
393
444
|
# based on semantic features of the input, for example, at the conclusion of
|
394
445
|
# meaningful phrases such as sentences. The service bases its understanding of
|
@@ -398,7 +449,7 @@ module IBMWatson
|
|
398
449
|
# interval.
|
399
450
|
#
|
400
451
|
# See [Split transcript at phrase
|
401
|
-
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
452
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#split-transcript).
|
402
453
|
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
403
454
|
# the parameter to suppress word insertions from music, coughing, and other
|
404
455
|
# non-speech events. The service biases the audio it passes for speech recognition
|
@@ -410,8 +461,8 @@ module IBMWatson
|
|
410
461
|
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
411
462
|
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
412
463
|
#
|
413
|
-
# The values increase on a monotonic curve. See [Speech
|
414
|
-
#
|
464
|
+
# The values increase on a monotonic curve. See [Speech detector
|
465
|
+
# sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity).
|
415
466
|
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
416
467
|
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
417
468
|
# conversations or background noise.
|
@@ -422,10 +473,27 @@ module IBMWatson
|
|
422
473
|
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
423
474
|
# * 1.0 suppresses all audio (no audio is transcribed).
|
424
475
|
#
|
425
|
-
# The values increase on a monotonic curve. See [
|
426
|
-
#
|
476
|
+
# The values increase on a monotonic curve. See [Background audio
|
477
|
+
# suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression).
|
478
|
+
# @param low_latency [Boolean] If `true` for next-generation `Multimedia` and `Telephony` models that support low
|
479
|
+
# latency, directs the service to produce results even more quickly than it usually
|
480
|
+
# does. Next-generation models produce transcription results faster than
|
481
|
+
# previous-generation models. The `low_latency` parameter causes the models to
|
482
|
+
# produce results even more quickly, though the results might be less accurate when
|
483
|
+
# the parameter is used.
|
484
|
+
#
|
485
|
+
# **Note:** The parameter is beta functionality. It is not available for
|
486
|
+
# previous-generation `Broadband` and `Narrowband` models. It is available only for
|
487
|
+
# some next-generation models.
|
488
|
+
#
|
489
|
+
# * For a list of next-generation models that support low latency, see [Supported
|
490
|
+
# language
|
491
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
|
492
|
+
# for next-generation models.
|
493
|
+
# * For more information about the `low_latency` parameter, see [Low
|
494
|
+
# latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
|
427
495
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
428
|
-
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
496
|
+
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
|
429
497
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
430
498
|
|
431
499
|
headers = {
|
@@ -458,7 +526,8 @@ module IBMWatson
|
|
458
526
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
459
527
|
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
460
528
|
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
461
|
-
"background_audio_suppression" => background_audio_suppression
|
529
|
+
"background_audio_suppression" => background_audio_suppression,
|
530
|
+
"low_latency" => low_latency
|
462
531
|
}
|
463
532
|
|
464
533
|
data = audio
|
@@ -477,7 +546,7 @@ module IBMWatson
|
|
477
546
|
end
|
478
547
|
|
479
548
|
##
|
480
|
-
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
549
|
+
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
|
481
550
|
# Sends audio for speech recognition using web sockets.
|
482
551
|
# @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
|
483
552
|
# @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
|
@@ -594,6 +663,23 @@ module IBMWatson
|
|
594
663
|
#
|
595
664
|
# The values increase on a monotonic curve. See [Speech Activity
|
596
665
|
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
666
|
+
# @param low_latency [Boolean] If `true` for next-generation `Multimedia` and `Telephony` models that support low
|
667
|
+
# latency, directs the service to produce results even more quickly than it usually
|
668
|
+
# does. Next-generation models produce transcription results faster than
|
669
|
+
# previous-generation models. The `low_latency` parameter causes the models to
|
670
|
+
# produce results even more quickly, though the results might be less accurate when
|
671
|
+
# the parameter is used.
|
672
|
+
#
|
673
|
+
# **Note:** The parameter is beta functionality. It is not available for
|
674
|
+
# previous-generation `Broadband` and `Narrowband` models. It is available only for
|
675
|
+
# some next-generation models.
|
676
|
+
#
|
677
|
+
# * For a list of next-generation models that support low latency, see [Supported
|
678
|
+
# language
|
679
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
|
680
|
+
# for next-generation models.
|
681
|
+
# * For more information about the `low_latency` parameter, see [Low
|
682
|
+
# latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
|
597
683
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
598
684
|
def recognize_using_websocket(
|
599
685
|
content_type: nil,
|
@@ -625,7 +711,8 @@ module IBMWatson
|
|
625
711
|
end_of_phrase_silence_time: nil,
|
626
712
|
split_transcript_at_phrase_end: nil,
|
627
713
|
speech_detector_sensitivity: nil,
|
628
|
-
background_audio_suppression: nil
|
714
|
+
background_audio_suppression: nil,
|
715
|
+
low_latency: nil
|
629
716
|
)
|
630
717
|
raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
|
631
718
|
raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
|
@@ -667,7 +754,8 @@ module IBMWatson
|
|
667
754
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
668
755
|
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
669
756
|
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
670
|
-
"background_audio_suppression" => background_audio_suppression
|
757
|
+
"background_audio_suppression" => background_audio_suppression,
|
758
|
+
"low_latency" => low_latency
|
671
759
|
}
|
672
760
|
options.delete_if { |_, v| v.nil? }
|
673
761
|
WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
|
@@ -785,7 +873,7 @@ module IBMWatson
|
|
785
873
|
end
|
786
874
|
|
787
875
|
##
|
788
|
-
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
876
|
+
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
|
789
877
|
# Create a job.
|
790
878
|
# Creates a job for a new asynchronous recognition request. The job is owned by the
|
791
879
|
# instance of the service whose credentials are used to create it. How you learn the
|
@@ -881,14 +969,49 @@ module IBMWatson
|
|
881
969
|
# sampling rate of the audio is lower than the minimum required rate, the request
|
882
970
|
# fails.
|
883
971
|
#
|
884
|
-
# **See also:** [
|
885
|
-
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats
|
886
|
-
#
|
972
|
+
# **See also:** [Supported audio
|
973
|
+
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats).
|
974
|
+
#
|
975
|
+
#
|
976
|
+
# ### Next-generation models
|
977
|
+
#
|
978
|
+
# **Note:** The next-generation language models are beta functionality. They
|
979
|
+
# support a limited number of languages and features at this time. The supported
|
980
|
+
# languages, models, and features will increase with future releases.
|
981
|
+
#
|
982
|
+
# The service supports next-generation `Multimedia` (16 kHz) and `Telephony` (8 kHz)
|
983
|
+
# models for many languages. Next-generation models have higher throughput than the
|
984
|
+
# service's previous generation of `Broadband` and `Narrowband` models. When you use
|
985
|
+
# next-generation models, the service can return transcriptions more quickly and
|
986
|
+
# also provide noticeably better transcription accuracy.
|
987
|
+
#
|
988
|
+
# You specify a next-generation model by using the `model` query parameter, as you
|
989
|
+
# do a previous-generation model. Next-generation models support the same request
|
990
|
+
# headers as previous-generation models, but they support only the following
|
991
|
+
# additional query parameters:
|
992
|
+
# * `background_audio_suppression`
|
993
|
+
# * `inactivity_timeout`
|
994
|
+
# * `profanity_filter`
|
995
|
+
# * `redaction`
|
996
|
+
# * `smart_formatting`
|
997
|
+
# * `speaker_labels`
|
998
|
+
# * `speech_detector_sensitivity`
|
999
|
+
# * `timestamps`
|
1000
|
+
#
|
1001
|
+
# Many next-generation models also support the beta `low_latency` parameter, which
|
1002
|
+
# is not available with previous-generation models.
|
1003
|
+
#
|
1004
|
+
# **See also:** [Next-generation languages and
|
1005
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
|
1006
|
+
# @param audio [File] The audio to transcribe.
|
887
1007
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
888
1008
|
# audio format, see **Audio formats (content types)** in the method description.
|
889
|
-
# @param model [String] The identifier of the model that is to be used for the recognition request.
|
890
|
-
#
|
891
|
-
#
|
1009
|
+
# @param model [String] The identifier of the model that is to be used for the recognition request.
|
1010
|
+
# (**Note:** The model `ar-AR_BroadbandModel` is deprecated; use
|
1011
|
+
# `ar-MS_BroadbandModel` instead.) See [Languages and
|
1012
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models) and
|
1013
|
+
# [Next-generation languages and
|
1014
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
|
892
1015
|
# @param callback_url [String] A URL to which callback notifications are to be sent. The URL must already be
|
893
1016
|
# successfully allowlisted by using the **Register a callback** method. You can
|
894
1017
|
# include the same callback URL with any number of job creation requests. Omit the
|
@@ -927,8 +1050,9 @@ module IBMWatson
|
|
927
1050
|
# recognition request. The base model of the specified custom language model must
|
928
1051
|
# match the model specified with the `model` parameter. You must make the request
|
929
1052
|
# with credentials for the instance of the service that owns the custom model. By
|
930
|
-
# default, no custom language model is used. See [
|
931
|
-
#
|
1053
|
+
# default, no custom language model is used. See [Using a custom language model for
|
1054
|
+
# speech
|
1055
|
+
# recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse).
|
932
1056
|
#
|
933
1057
|
#
|
934
1058
|
# **Note:** Use this parameter instead of the deprecated `customization_id`
|
@@ -937,14 +1061,16 @@ module IBMWatson
|
|
937
1061
|
# recognition request. The base model of the specified custom acoustic model must
|
938
1062
|
# match the model specified with the `model` parameter. You must make the request
|
939
1063
|
# with credentials for the instance of the service that owns the custom model. By
|
940
|
-
# default, no custom acoustic model is used. See [
|
941
|
-
#
|
1064
|
+
# default, no custom acoustic model is used. See [Using a custom acoustic model for
|
1065
|
+
# speech
|
1066
|
+
# recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acousticUse).
|
942
1067
|
# @param base_model_version [String] The version of the specified base model that is to be used with the recognition
|
943
1068
|
# request. Multiple versions of a base model can exist when a model is updated for
|
944
1069
|
# internal improvements. The parameter is intended primarily for use with custom
|
945
1070
|
# models that have been upgraded for a new base model. The default value depends on
|
946
|
-
# whether the parameter is used with or without a custom model. See [
|
947
|
-
#
|
1071
|
+
# whether the parameter is used with or without a custom model. See [Making speech
|
1072
|
+
# recognition requests with upgraded custom
|
1073
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade-use#custom-upgrade-use-recognition).
|
948
1074
|
# @param customization_weight [Float] If you specify the customization ID (GUID) of a custom language model with the
|
949
1075
|
# recognition request, the customization weight tells the service how much weight to
|
950
1076
|
# give to words from the custom language model compared to those from the base model
|
@@ -961,8 +1087,8 @@ module IBMWatson
|
|
961
1087
|
# custom model's domain, but it can negatively affect performance on non-domain
|
962
1088
|
# phrases.
|
963
1089
|
#
|
964
|
-
# See [
|
965
|
-
#
|
1090
|
+
# See [Using customization
|
1091
|
+
# weight](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse#weight).
|
966
1092
|
# @param inactivity_timeout [Fixnum] The time in seconds after which, if only silence (no speech) is detected in
|
967
1093
|
# streaming audio, the connection is closed with a 400 error. The parameter is
|
968
1094
|
# useful for stopping audio submission from a live microphone when a user simply
|
@@ -979,34 +1105,34 @@ module IBMWatson
|
|
979
1105
|
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
980
1106
|
#
|
981
1107
|
# See [Keyword
|
982
|
-
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1108
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#keyword-spotting).
|
983
1109
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
984
1110
|
# considered to match a keyword if its confidence is greater than or equal to the
|
985
1111
|
# threshold. Specify a probability between 0.0 and 1.0. If you specify a threshold,
|
986
1112
|
# you must also specify one or more keywords. The service performs no keyword
|
987
1113
|
# spotting if you omit either parameter. See [Keyword
|
988
|
-
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1114
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#keyword-spotting).
|
989
1115
|
# @param max_alternatives [Fixnum] The maximum number of alternative transcripts that the service is to return. By
|
990
1116
|
# default, the service returns a single transcript. If you specify a value of `0`,
|
991
1117
|
# the service uses the default value, `1`. See [Maximum
|
992
|
-
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1118
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#max-alternatives).
|
993
1119
|
# @param word_alternatives_threshold [Float] A confidence value that is the lower bound for identifying a hypothesis as a
|
994
1120
|
# possible word alternative (also known as "Confusion Networks"). An alternative
|
995
1121
|
# word is considered if its confidence is greater than or equal to the threshold.
|
996
1122
|
# Specify a probability between 0.0 and 1.0. By default, the service computes no
|
997
1123
|
# alternative words. See [Word
|
998
|
-
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1124
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-spotting#word-alternatives).
|
999
1125
|
# @param word_confidence [Boolean] If `true`, the service returns a confidence measure in the range of 0.0 to 1.0 for
|
1000
1126
|
# each word. By default, the service returns no word confidence scores. See [Word
|
1001
|
-
# confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1127
|
+
# confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#word-confidence).
|
1002
1128
|
# @param timestamps [Boolean] If `true`, the service returns time alignment for each word. By default, no
|
1003
1129
|
# timestamps are returned. See [Word
|
1004
|
-
# timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1130
|
+
# timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metadata#word-timestamps).
|
1005
1131
|
# @param profanity_filter [Boolean] If `true`, the service filters profanity from all output except for keyword
|
1006
1132
|
# results by replacing inappropriate words with a series of asterisks. Set the
|
1007
1133
|
# parameter to `false` to return results with no censoring. Applies to US English
|
1008
|
-
# transcription only. See [Profanity
|
1009
|
-
# filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1134
|
+
# and Japanese transcription only. See [Profanity
|
1135
|
+
# filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#profanity-filtering).
|
1010
1136
|
# @param smart_formatting [Boolean] If `true`, the service converts dates, times, series of digits and numbers, phone
|
1011
1137
|
# numbers, currency values, and internet addresses into more readable, conventional
|
1012
1138
|
# representations in the final transcript of a recognition request. For US English,
|
@@ -1016,19 +1142,21 @@ module IBMWatson
|
|
1016
1142
|
# **Note:** Applies to US English, Japanese, and Spanish transcription only.
|
1017
1143
|
#
|
1018
1144
|
# See [Smart
|
1019
|
-
# formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1145
|
+
# formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#smart-formatting).
|
1020
1146
|
# @param speaker_labels [Boolean] If `true`, the response includes labels that identify which words were spoken by
|
1021
1147
|
# which participants in a multi-person exchange. By default, the service returns no
|
1022
1148
|
# speaker labels. Setting `speaker_labels` to `true` forces the `timestamps`
|
1023
1149
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
1024
1150
|
# parameter.
|
1025
|
-
#
|
1026
|
-
#
|
1027
|
-
#
|
1028
|
-
#
|
1029
|
-
#
|
1030
|
-
#
|
1031
|
-
# labels
|
1151
|
+
# * For previous-generation models, can be used for US English, Australian English,
|
1152
|
+
# German, Japanese, Korean, and Spanish (both broadband and narrowband models) and
|
1153
|
+
# UK English (narrowband model) transcription only.
|
1154
|
+
# * For next-generation models, can be used for English (Australian, UK, and US),
|
1155
|
+
# German, and Spanish transcription only.
|
1156
|
+
#
|
1157
|
+
# Restrictions and limitations apply to the use of speaker labels for both types of
|
1158
|
+
# models. See [Speaker
|
1159
|
+
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-speaker-labels).
|
1032
1160
|
# @param customization_id [String] **Deprecated.** Use the `language_customization_id` parameter to specify the
|
1033
1161
|
# customization ID (GUID) of a custom language model that is to be used with the
|
1034
1162
|
# recognition request. Do not specify both parameters with a request.
|
@@ -1037,7 +1165,8 @@ module IBMWatson
|
|
1037
1165
|
# specify the name of the custom language model for which the grammar is defined.
|
1038
1166
|
# The service recognizes only strings that are recognized by the specified grammar;
|
1039
1167
|
# it does not recognize other custom words from the model's words resource. See
|
1040
|
-
# [
|
1168
|
+
# [Using a grammar for speech
|
1169
|
+
# recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarUse).
|
1041
1170
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
1042
1171
|
# feature redacts any number that has three or more consecutive digits by replacing
|
1043
1172
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -1052,7 +1181,7 @@ module IBMWatson
|
|
1052
1181
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
1053
1182
|
#
|
1054
1183
|
# See [Numeric
|
1055
|
-
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1184
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#numeric-redaction).
|
1056
1185
|
# @param processing_metrics [Boolean] If `true`, requests processing metrics about the service's transcription of the
|
1057
1186
|
# input audio. The service returns processing metrics at the interval specified by
|
1058
1187
|
# the `processing_metrics_interval` parameter. It also returns processing metrics
|
@@ -1060,7 +1189,7 @@ module IBMWatson
|
|
1060
1189
|
# the service returns no processing metrics.
|
1061
1190
|
#
|
1062
1191
|
# See [Processing
|
1063
|
-
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#
|
1192
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing-metrics).
|
1064
1193
|
# @param processing_metrics_interval [Float] Specifies the interval in real wall-clock seconds at which the service is to
|
1065
1194
|
# return processing metrics. The parameter is ignored unless the
|
1066
1195
|
# `processing_metrics` parameter is set to `true`.
|
@@ -1074,13 +1203,13 @@ module IBMWatson
|
|
1074
1203
|
# the service returns processing metrics only for transcription events.
|
1075
1204
|
#
|
1076
1205
|
# See [Processing
|
1077
|
-
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#
|
1206
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing-metrics).
|
1078
1207
|
# @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
|
1079
1208
|
# input audio. The service returns audio metrics with the final transcription
|
1080
1209
|
# results. By default, the service returns no audio metrics.
|
1081
1210
|
#
|
1082
1211
|
# See [Audio
|
1083
|
-
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#
|
1212
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio-metrics).
|
1084
1213
|
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
1085
1214
|
# splits a transcript into multiple final results. If the service detects pauses or
|
1086
1215
|
# extended silence before it reaches the end of the audio stream, its response can
|
@@ -1097,7 +1226,7 @@ module IBMWatson
|
|
1097
1226
|
# Chinese is 0.6 seconds.
|
1098
1227
|
#
|
1099
1228
|
# See [End of phrase silence
|
1100
|
-
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1229
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#silence-time).
|
1101
1230
|
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
1102
1231
|
# based on semantic features of the input, for example, at the conclusion of
|
1103
1232
|
# meaningful phrases such as sentences. The service bases its understanding of
|
@@ -1107,7 +1236,7 @@ module IBMWatson
|
|
1107
1236
|
# interval.
|
1108
1237
|
#
|
1109
1238
|
# See [Split transcript at phrase
|
1110
|
-
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1239
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#split-transcript).
|
1111
1240
|
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
1112
1241
|
# the parameter to suppress word insertions from music, coughing, and other
|
1113
1242
|
# non-speech events. The service biases the audio it passes for speech recognition
|
@@ -1119,8 +1248,8 @@ module IBMWatson
|
|
1119
1248
|
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
1120
1249
|
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
1121
1250
|
#
|
1122
|
-
# The values increase on a monotonic curve. See [Speech
|
1123
|
-
#
|
1251
|
+
# The values increase on a monotonic curve. See [Speech detector
|
1252
|
+
# sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity).
|
1124
1253
|
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
1125
1254
|
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
1126
1255
|
# conversations or background noise.
|
@@ -1131,10 +1260,27 @@ module IBMWatson
|
|
1131
1260
|
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
1132
1261
|
# * 1.0 suppresses all audio (no audio is transcribed).
|
1133
1262
|
#
|
1134
|
-
# The values increase on a monotonic curve. See [
|
1135
|
-
#
|
1263
|
+
# The values increase on a monotonic curve. See [Background audio
|
1264
|
+
# suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression).
|
1265
|
+
# @param low_latency [Boolean] If `true` for next-generation `Multimedia` and `Telephony` models that support low
|
1266
|
+
# latency, directs the service to produce results even more quickly than it usually
|
1267
|
+
# does. Next-generation models produce transcription results faster than
|
1268
|
+
# previous-generation models. The `low_latency` parameter causes the models to
|
1269
|
+
# produce results even more quickly, though the results might be less accurate when
|
1270
|
+
# the parameter is used.
|
1271
|
+
#
|
1272
|
+
# **Note:** The parameter is beta functionality. It is not available for
|
1273
|
+
# previous-generation `Broadband` and `Narrowband` models. It is available only for
|
1274
|
+
# some next-generation models.
|
1275
|
+
#
|
1276
|
+
# * For a list of next-generation models that support low latency, see [Supported
|
1277
|
+
# language
|
1278
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
|
1279
|
+
# for next-generation models.
|
1280
|
+
# * For more information about the `low_latency` parameter, see [Low
|
1281
|
+
# latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
|
1136
1282
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1137
|
-
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
1283
|
+
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil, low_latency: nil)
|
1138
1284
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
1139
1285
|
|
1140
1286
|
headers = {
|
@@ -1173,7 +1319,8 @@ module IBMWatson
|
|
1173
1319
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
1174
1320
|
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
1175
1321
|
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
1176
|
-
"background_audio_suppression" => background_audio_suppression
|
1322
|
+
"background_audio_suppression" => background_audio_suppression,
|
1323
|
+
"low_latency" => low_latency
|
1177
1324
|
}
|
1178
1325
|
|
1179
1326
|
data = audio
|
@@ -1391,9 +1538,12 @@ module IBMWatson
|
|
1391
1538
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language).
|
1392
1539
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
1393
1540
|
# are to be returned. Omit the parameter to see all custom language or custom
|
1394
|
-
# acoustic models that are owned by the requesting credentials. **Note:** The
|
1395
|
-
# `ar-AR`
|
1396
|
-
#
|
1541
|
+
# acoustic models that are owned by the requesting credentials. (**Note:** The
|
1542
|
+
# identifier `ar-AR` is deprecated; use `ar-MS` instead.)
|
1543
|
+
#
|
1544
|
+
# To determine the languages for which customization is available, see [Language
|
1545
|
+
# support for
|
1546
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
1397
1547
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1398
1548
|
def list_language_models(language: nil)
|
1399
1549
|
headers = {
|
@@ -1544,6 +1694,9 @@ module IBMWatson
|
|
1544
1694
|
# The value that you assign is used for all recognition requests that use the model.
|
1545
1695
|
# You can override it for any recognition request by specifying a customization
|
1546
1696
|
# weight for that request.
|
1697
|
+
#
|
1698
|
+
# See [Using customization
|
1699
|
+
# weight](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageUse#weight).
|
1547
1700
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1548
1701
|
def train_language_model(customization_id:, word_type_to_add: nil, customization_weight: nil)
|
1549
1702
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -1625,7 +1778,7 @@ module IBMWatson
|
|
1625
1778
|
# subsequent requests for the model until the upgrade completes.
|
1626
1779
|
#
|
1627
1780
|
# **See also:** [Upgrading a custom language
|
1628
|
-
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
1781
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade#custom-upgrade-language).
|
1629
1782
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1630
1783
|
# the request. You must make the request with credentials for the instance of the
|
1631
1784
|
# service that owns the custom model.
|
@@ -2464,7 +2617,8 @@ module IBMWatson
|
|
2464
2617
|
# custom model`.
|
2465
2618
|
# @param base_model_name [String] The name of the base language model that is to be customized by the new custom
|
2466
2619
|
# acoustic model. The new custom model can be used only with the base model that it
|
2467
|
-
# customizes.
|
2620
|
+
# customizes. (**Note:** The model `ar-AR_BroadbandModel` is deprecated; use
|
2621
|
+
# `ar-MS_BroadbandModel` instead.)
|
2468
2622
|
#
|
2469
2623
|
# To determine whether a base model supports acoustic model customization, refer to
|
2470
2624
|
# [Language support for
|
@@ -2513,9 +2667,12 @@ module IBMWatson
|
|
2513
2667
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic).
|
2514
2668
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
2515
2669
|
# are to be returned. Omit the parameter to see all custom language or custom
|
2516
|
-
# acoustic models that are owned by the requesting credentials. **Note:** The
|
2517
|
-
# `ar-AR`
|
2518
|
-
#
|
2670
|
+
# acoustic models that are owned by the requesting credentials. (**Note:** The
|
2671
|
+
# identifier `ar-AR` is deprecated; use `ar-MS` instead.)
|
2672
|
+
#
|
2673
|
+
# To determine the languages for which customization is available, see [Language
|
2674
|
+
# support for
|
2675
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
2519
2676
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2520
2677
|
def list_acoustic_models(language: nil)
|
2521
2678
|
headers = {
|
@@ -2613,14 +2770,14 @@ module IBMWatson
|
|
2613
2770
|
# it. You must use credentials for the instance of the service that owns a model to
|
2614
2771
|
# train it.
|
2615
2772
|
#
|
2616
|
-
# The training method is asynchronous.
|
2617
|
-
#
|
2618
|
-
#
|
2619
|
-
# training a custom acoustic model takes
|
2620
|
-
#
|
2621
|
-
#
|
2622
|
-
# returns an HTTP 200 response code to indicate that the training
|
2623
|
-
#
|
2773
|
+
# The training method is asynchronous. Training time depends on the cumulative
|
2774
|
+
# amount of audio data that the custom acoustic model contains and the current load
|
2775
|
+
# on the service. When you train or retrain a model, the service uses all of the
|
2776
|
+
# model's audio data in the training. Training a custom acoustic model takes
|
2777
|
+
# approximately as long as the length of its cumulative audio data. For example, it
|
2778
|
+
# takes approximately 2 hours to train a model that contains a total of 2 hours of
|
2779
|
+
# audio. The method returns an HTTP 200 response code to indicate that the training
|
2780
|
+
# process has begun.
|
2624
2781
|
#
|
2625
2782
|
# You can monitor the status of the training by using the **Get a custom acoustic
|
2626
2783
|
# model** method to poll the model's status. Use a loop to check the status once a
|
@@ -2765,7 +2922,7 @@ module IBMWatson
|
|
2765
2922
|
# acoustic model was not trained with a custom language model.
|
2766
2923
|
#
|
2767
2924
|
# **See also:** [Upgrading a custom acoustic
|
2768
|
-
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
2925
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade#custom-upgrade-acoustic).
|
2769
2926
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2770
2927
|
# the request. You must make the request with credentials for the instance of the
|
2771
2928
|
# service that owns the custom model.
|
@@ -2779,7 +2936,7 @@ module IBMWatson
|
|
2779
2936
|
# upgrade of a custom acoustic model that is trained with a custom language model,
|
2780
2937
|
# and only if you receive a 400 response code and the message `No input data
|
2781
2938
|
# modified since last training`. See [Upgrading a custom acoustic
|
2782
|
-
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-
|
2939
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade#custom-upgrade-acoustic).
|
2783
2940
|
# @return [nil]
|
2784
2941
|
def upgrade_acoustic_model(customization_id:, custom_language_model_id: nil, force: nil)
|
2785
2942
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2917,8 +3074,8 @@ module IBMWatson
|
|
2917
3074
|
# If the sampling rate of the audio is lower than the minimum required rate, the
|
2918
3075
|
# service labels the audio file as `invalid`.
|
2919
3076
|
#
|
2920
|
-
# **See also:** [
|
2921
|
-
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats
|
3077
|
+
# **See also:** [Supported audio
|
3078
|
+
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats).
|
2922
3079
|
#
|
2923
3080
|
#
|
2924
3081
|
# ### Content types for archive-type resources
|
@@ -2964,7 +3121,7 @@ module IBMWatson
|
|
2964
3121
|
# used, their use is strongly discouraged.)
|
2965
3122
|
# * Do not use the name of an audio resource that has already been added to the
|
2966
3123
|
# custom model.
|
2967
|
-
# @param audio_resource [
|
3124
|
+
# @param audio_resource [File] The audio resource that is to be added to the custom acoustic model, an individual
|
2968
3125
|
# audio file or an archive file.
|
2969
3126
|
#
|
2970
3127
|
# With the `curl` command, use the `--data-binary` option to upload the file for the
|