ibm_watson 1.2.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +33 -5
- data/lib/ibm_watson/assistant_v1.rb +153 -209
- data/lib/ibm_watson/assistant_v2.rb +168 -15
- data/lib/ibm_watson/compare_comply_v1.rb +11 -5
- data/lib/ibm_watson/discovery_v1.rb +14 -8
- data/lib/ibm_watson/discovery_v2.rb +605 -12
- data/lib/ibm_watson/language_translator_v3.rb +166 -47
- data/lib/ibm_watson/natural_language_classifier_v1.rb +10 -4
- data/lib/ibm_watson/natural_language_understanding_v1.rb +19 -15
- data/lib/ibm_watson/personality_insights_v3.rb +17 -11
- data/lib/ibm_watson/speech_to_text_v1.rb +323 -195
- data/lib/ibm_watson/text_to_speech_v1.rb +75 -59
- data/lib/ibm_watson/tone_analyzer_v3.rb +11 -5
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +11 -5
- data/lib/ibm_watson/visual_recognition_v4.rb +199 -4
- data/test/integration/test_assistant_v2.rb +25 -0
- data/test/integration/test_compare_comply_v1.rb +1 -12
- data/test/integration/test_discovery_v2.rb +118 -6
- data/test/integration/test_language_translator_v3.rb +5 -0
- data/test/integration/test_speech_to_text_v1.rb +2 -0
- data/test/integration/test_visual_recognition_v4.rb +9 -0
- data/test/unit/test_assistant_v1.rb +98 -98
- data/test/unit/test_assistant_v2.rb +102 -8
- data/test/unit/test_compare_comply_v1.rb +20 -20
- data/test/unit/test_discovery_v1.rb +125 -125
- data/test/unit/test_discovery_v2.rb +262 -29
- data/test/unit/test_language_translator_v3.rb +85 -24
- data/test/unit/test_natural_language_classifier_v1.rb +17 -17
- data/test/unit/test_natural_language_understanding_v1.rb +10 -10
- data/test/unit/test_personality_insights_v3.rb +14 -10
- data/test/unit/test_speech_to_text_v1.rb +97 -97
- data/test/unit/test_text_to_speech_v1.rb +41 -41
- data/test/unit/test_tone_analyzer_v3.rb +12 -12
- data/test/unit/test_visual_recognition_v3.rb +16 -16
- data/test/unit/test_visual_recognition_v4.rb +117 -30
- metadata +5 -6
- data/test/unit/test_vcap_using_personality_insights.rb +0 -161
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# (C) Copyright IBM Corp. 2020.
|
3
|
+
# (C) Copyright IBM Corp. 2018, 2020.
|
4
4
|
#
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
6
|
# you may not use this file except in compliance with the License.
|
@@ -14,12 +14,12 @@
|
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
16
|
|
17
|
-
# The IBM&
|
18
|
-
# capabilities to produce transcripts of spoken audio. The service can
|
19
|
-
# from various languages and audio formats. In addition to basic
|
20
|
-
# service can produce detailed information about many different aspects
|
21
|
-
# most languages, the service supports two sampling rates, broadband and
|
22
|
-
# returns all JSON response content in the UTF-8 character set.
|
17
|
+
# The IBM Watson™ Speech to Text service provides APIs that use IBM's
|
18
|
+
# speech-recognition capabilities to produce transcripts of spoken audio. The service can
|
19
|
+
# transcribe speech from various languages and audio formats. In addition to basic
|
20
|
+
# transcription, the service can produce detailed information about many different aspects
|
21
|
+
# of the audio. For most languages, the service supports two sampling rates, broadband and
|
22
|
+
# narrowband. It returns all JSON response content in the UTF-8 character set.
|
23
23
|
#
|
24
24
|
# For speech recognition, the service supports synchronous and asynchronous HTTP
|
25
25
|
# Representational State Transfer (REST) interfaces. It also supports a WebSocket
|
@@ -34,9 +34,9 @@
|
|
34
34
|
# is a formal language specification that lets you restrict the phrases that the service
|
35
35
|
# can recognize.
|
36
36
|
#
|
37
|
-
# Language model customization
|
38
|
-
#
|
39
|
-
#
|
37
|
+
# Language model customization and acoustic model customization are generally available
|
38
|
+
# for production use with all language models that are generally available. Grammars are
|
39
|
+
# beta functionality for all language models that support language model customization.
|
40
40
|
|
41
41
|
require "concurrent"
|
42
42
|
require "erb"
|
@@ -50,6 +50,8 @@ module IBMWatson
|
|
50
50
|
# The Speech to Text V1 service.
|
51
51
|
class SpeechToTextV1 < IBMCloudSdkCore::BaseService
|
52
52
|
include Concurrent::Async
|
53
|
+
DEFAULT_SERVICE_NAME = "speech_to_text"
|
54
|
+
DEFAULT_SERVICE_URL = "https://api.us-south.speech-to-text.watson.cloud.ibm.com"
|
53
55
|
##
|
54
56
|
# @!method initialize(args)
|
55
57
|
# Construct a new client for the Speech to Text service.
|
@@ -58,15 +60,19 @@ module IBMWatson
|
|
58
60
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
59
61
|
# The base service_url may differ between IBM Cloud regions.
|
60
62
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
63
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
64
|
+
# any external configuration, if applicable.
|
61
65
|
def initialize(args = {})
|
62
66
|
@__async_initialized__ = false
|
63
67
|
defaults = {}
|
64
|
-
defaults[:service_url] =
|
68
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
69
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
65
70
|
defaults[:authenticator] = nil
|
71
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
66
72
|
args = defaults.merge(args)
|
67
|
-
args[:service_name] = "speech_to_text"
|
68
73
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
69
74
|
super
|
75
|
+
@service_url = user_service_url unless user_service_url.nil?
|
70
76
|
end
|
71
77
|
|
72
78
|
#########################
|
@@ -78,10 +84,11 @@ module IBMWatson
|
|
78
84
|
# List models.
|
79
85
|
# Lists all language models that are available for use with the service. The
|
80
86
|
# information includes the name of the model and its minimum sampling rate in Hertz,
|
81
|
-
# among other things.
|
87
|
+
# among other things. The ordering of the list of models can change from call to
|
88
|
+
# call; do not rely on an alphabetized or static list of models.
|
82
89
|
#
|
83
90
|
# **See also:** [Languages and
|
84
|
-
# models](https://cloud.ibm.com/docs/
|
91
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
85
92
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
86
93
|
def list_models
|
87
94
|
headers = {
|
@@ -108,7 +115,7 @@ module IBMWatson
|
|
108
115
|
# sampling rate in Hertz, among other things.
|
109
116
|
#
|
110
117
|
# **See also:** [Languages and
|
111
|
-
# models](https://cloud.ibm.com/docs/
|
118
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
112
119
|
# @param model_id [String] The identifier of the model in the form of its name from the output of the **Get a
|
113
120
|
# model** method.
|
114
121
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
@@ -135,7 +142,7 @@ module IBMWatson
|
|
135
142
|
#########################
|
136
143
|
|
137
144
|
##
|
138
|
-
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
145
|
+
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
139
146
|
# Recognize audio.
|
140
147
|
# Sends audio and returns transcription results for a recognition request. You can
|
141
148
|
# pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
|
@@ -146,7 +153,7 @@ module IBMWatson
|
|
146
153
|
# upload the file for the request.)
|
147
154
|
#
|
148
155
|
# **See also:** [Making a basic HTTP
|
149
|
-
# request](https://cloud.ibm.com/docs/
|
156
|
+
# request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-basic).
|
150
157
|
#
|
151
158
|
#
|
152
159
|
# ### Streaming mode
|
@@ -161,9 +168,9 @@ module IBMWatson
|
|
161
168
|
#
|
162
169
|
# **See also:**
|
163
170
|
# * [Audio
|
164
|
-
# transmission](https://cloud.ibm.com/docs/
|
171
|
+
# transmission](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#transmission)
|
165
172
|
# *
|
166
|
-
# [Timeouts](https://cloud.ibm.com/docs/
|
173
|
+
# [Timeouts](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts)
|
167
174
|
#
|
168
175
|
#
|
169
176
|
# ### Audio formats (content types)
|
@@ -203,7 +210,7 @@ module IBMWatson
|
|
203
210
|
# fails.
|
204
211
|
#
|
205
212
|
# **See also:** [Audio
|
206
|
-
# formats](https://cloud.ibm.com/docs/
|
213
|
+
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
|
207
214
|
#
|
208
215
|
#
|
209
216
|
# ### Multipart speech recognition
|
@@ -222,19 +229,19 @@ module IBMWatson
|
|
222
229
|
# want to spot a very large number of keywords.
|
223
230
|
#
|
224
231
|
# **See also:** [Making a multipart HTTP
|
225
|
-
# request](https://cloud.ibm.com/docs/
|
232
|
+
# request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-multi).
|
226
233
|
# @param audio [String] The audio to transcribe.
|
227
234
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
228
235
|
# audio format, see **Audio formats (content types)** in the method description.
|
229
236
|
# @param model [String] The identifier of the model that is to be used for the recognition request. See
|
230
237
|
# [Languages and
|
231
|
-
# models](https://cloud.ibm.com/docs/
|
238
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
232
239
|
# @param language_customization_id [String] The customization ID (GUID) of a custom language model that is to be used with the
|
233
240
|
# recognition request. The base model of the specified custom language model must
|
234
241
|
# match the model specified with the `model` parameter. You must make the request
|
235
242
|
# with credentials for the instance of the service that owns the custom model. By
|
236
243
|
# default, no custom language model is used. See [Custom
|
237
|
-
# models](https://cloud.ibm.com/docs/
|
244
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
238
245
|
#
|
239
246
|
#
|
240
247
|
# **Note:** Use this parameter instead of the deprecated `customization_id`
|
@@ -244,13 +251,13 @@ module IBMWatson
|
|
244
251
|
# match the model specified with the `model` parameter. You must make the request
|
245
252
|
# with credentials for the instance of the service that owns the custom model. By
|
246
253
|
# default, no custom acoustic model is used. See [Custom
|
247
|
-
# models](https://cloud.ibm.com/docs/
|
254
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
248
255
|
# @param base_model_version [String] The version of the specified base model that is to be used with the recognition
|
249
256
|
# request. Multiple versions of a base model can exist when a model is updated for
|
250
257
|
# internal improvements. The parameter is intended primarily for use with custom
|
251
258
|
# models that have been upgraded for a new base model. The default value depends on
|
252
259
|
# whether the parameter is used with or without a custom model. See [Base model
|
253
|
-
# version](https://cloud.ibm.com/docs/
|
260
|
+
# version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
|
254
261
|
# @param customization_weight [Float] If you specify the customization ID (GUID) of a custom language model with the
|
255
262
|
# recognition request, the customization weight tells the service how much weight to
|
256
263
|
# give to words from the custom language model compared to those from the base model
|
@@ -268,45 +275,51 @@ module IBMWatson
|
|
268
275
|
# phrases.
|
269
276
|
#
|
270
277
|
# See [Custom
|
271
|
-
# models](https://cloud.ibm.com/docs/
|
278
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
272
279
|
# @param inactivity_timeout [Fixnum] The time in seconds after which, if only silence (no speech) is detected in
|
273
280
|
# streaming audio, the connection is closed with a 400 error. The parameter is
|
274
281
|
# useful for stopping audio submission from a live microphone when a user simply
|
275
282
|
# walks away. Use `-1` for infinity. See [Inactivity
|
276
|
-
# timeout](https://cloud.ibm.com/docs/
|
283
|
+
# timeout](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts-inactivity).
|
277
284
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
278
285
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
279
286
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
280
|
-
# threshold.
|
281
|
-
#
|
282
|
-
#
|
287
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
288
|
+
# keywords.
|
289
|
+
#
|
290
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
291
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
292
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
293
|
+
#
|
294
|
+
# See [Keyword
|
295
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
283
296
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
284
297
|
# considered to match a keyword if its confidence is greater than or equal to the
|
285
298
|
# threshold. Specify a probability between 0.0 and 1.0. If you specify a threshold,
|
286
299
|
# you must also specify one or more keywords. The service performs no keyword
|
287
300
|
# spotting if you omit either parameter. See [Keyword
|
288
|
-
# spotting](https://cloud.ibm.com/docs/
|
301
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
289
302
|
# @param max_alternatives [Fixnum] The maximum number of alternative transcripts that the service is to return. By
|
290
303
|
# default, the service returns a single transcript. If you specify a value of `0`,
|
291
304
|
# the service uses the default value, `1`. See [Maximum
|
292
|
-
# alternatives](https://cloud.ibm.com/docs/
|
305
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#max_alternatives).
|
293
306
|
# @param word_alternatives_threshold [Float] A confidence value that is the lower bound for identifying a hypothesis as a
|
294
307
|
# possible word alternative (also known as "Confusion Networks"). An alternative
|
295
308
|
# word is considered if its confidence is greater than or equal to the threshold.
|
296
309
|
# Specify a probability between 0.0 and 1.0. By default, the service computes no
|
297
310
|
# alternative words. See [Word
|
298
|
-
# alternatives](https://cloud.ibm.com/docs/
|
311
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_alternatives).
|
299
312
|
# @param word_confidence [Boolean] If `true`, the service returns a confidence measure in the range of 0.0 to 1.0 for
|
300
313
|
# each word. By default, the service returns no word confidence scores. See [Word
|
301
|
-
# confidence](https://cloud.ibm.com/docs/
|
314
|
+
# confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_confidence).
|
302
315
|
# @param timestamps [Boolean] If `true`, the service returns time alignment for each word. By default, no
|
303
316
|
# timestamps are returned. See [Word
|
304
|
-
# timestamps](https://cloud.ibm.com/docs/
|
317
|
+
# timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_timestamps).
|
305
318
|
# @param profanity_filter [Boolean] If `true`, the service filters profanity from all output except for keyword
|
306
319
|
# results by replacing inappropriate words with a series of asterisks. Set the
|
307
320
|
# parameter to `false` to return results with no censoring. Applies to US English
|
308
321
|
# transcription only. See [Profanity
|
309
|
-
# filtering](https://cloud.ibm.com/docs/
|
322
|
+
# filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#profanity_filter).
|
310
323
|
# @param smart_formatting [Boolean] If `true`, the service converts dates, times, series of digits and numbers, phone
|
311
324
|
# numbers, currency values, and internet addresses into more readable, conventional
|
312
325
|
# representations in the final transcript of a recognition request. For US English,
|
@@ -316,21 +329,19 @@ module IBMWatson
|
|
316
329
|
# **Note:** Applies to US English, Japanese, and Spanish transcription only.
|
317
330
|
#
|
318
331
|
# See [Smart
|
319
|
-
# formatting](https://cloud.ibm.com/docs/
|
332
|
+
# formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#smart_formatting).
|
320
333
|
# @param speaker_labels [Boolean] If `true`, the response includes labels that identify which words were spoken by
|
321
334
|
# which participants in a multi-person exchange. By default, the service returns no
|
322
335
|
# speaker labels. Setting `speaker_labels` to `true` forces the `timestamps`
|
323
336
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
324
337
|
# parameter.
|
325
338
|
#
|
326
|
-
# **Note:** Applies to US English,
|
327
|
-
# narrowband models) and UK English (narrowband model)
|
328
|
-
#
|
329
|
-
# **Get a model** method and check that the attribute `speaker_labels` is set to
|
330
|
-
# `true`.
|
339
|
+
# **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
|
340
|
+
# Spanish (both broadband and narrowband models) and UK English (narrowband model)
|
341
|
+
# transcription only.
|
331
342
|
#
|
332
343
|
# See [Speaker
|
333
|
-
# labels](https://cloud.ibm.com/docs/
|
344
|
+
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
334
345
|
# @param customization_id [String] **Deprecated.** Use the `language_customization_id` parameter to specify the
|
335
346
|
# customization ID (GUID) of a custom language model that is to be used with the
|
336
347
|
# recognition request. Do not specify both parameters with a request.
|
@@ -339,7 +350,7 @@ module IBMWatson
|
|
339
350
|
# specify the name of the custom language model for which the grammar is defined.
|
340
351
|
# The service recognizes only strings that are recognized by the specified grammar;
|
341
352
|
# it does not recognize other custom words from the model's words resource. See
|
342
|
-
# [Grammars](https://cloud.ibm.com/docs/
|
353
|
+
# [Grammars](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#grammars-input).
|
343
354
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
344
355
|
# feature redacts any number that has three or more consecutive digits by replacing
|
345
356
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -354,13 +365,13 @@ module IBMWatson
|
|
354
365
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
355
366
|
#
|
356
367
|
# See [Numeric
|
357
|
-
# redaction](https://cloud.ibm.com/docs/
|
368
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
|
358
369
|
# @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
|
359
370
|
# input audio. The service returns audio metrics with the final transcription
|
360
371
|
# results. By default, the service returns no audio metrics.
|
361
372
|
#
|
362
373
|
# See [Audio
|
363
|
-
# metrics](https://cloud.ibm.com/docs/
|
374
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
|
364
375
|
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
365
376
|
# splits a transcript into multiple final results. If the service detects pauses or
|
366
377
|
# extended silence before it reaches the end of the audio stream, its response can
|
@@ -377,7 +388,7 @@ module IBMWatson
|
|
377
388
|
# Chinese is 0.6 seconds.
|
378
389
|
#
|
379
390
|
# See [End of phrase silence
|
380
|
-
# time](https://cloud.ibm.com/docs/
|
391
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
|
381
392
|
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
382
393
|
# based on semantic features of the input, for example, at the conclusion of
|
383
394
|
# meaningful phrases such as sentences. The service bases its understanding of
|
@@ -387,9 +398,34 @@ module IBMWatson
|
|
387
398
|
# interval.
|
388
399
|
#
|
389
400
|
# See [Split transcript at phrase
|
390
|
-
# end](https://cloud.ibm.com/docs/
|
401
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
402
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
403
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
404
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
405
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
406
|
+
# activity.
|
407
|
+
#
|
408
|
+
# Specify a value between 0.0 and 1.0:
|
409
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
410
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
411
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
412
|
+
#
|
413
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
414
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
415
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
416
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
417
|
+
# conversations or background noise.
|
418
|
+
#
|
419
|
+
# Specify a value in the range of 0.0 to 1.0:
|
420
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
421
|
+
# disabled).
|
422
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
423
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
424
|
+
#
|
425
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
426
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
391
427
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
392
|
-
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
428
|
+
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
393
429
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
394
430
|
|
395
431
|
headers = {
|
@@ -420,7 +456,9 @@ module IBMWatson
|
|
420
456
|
"redaction" => redaction,
|
421
457
|
"audio_metrics" => audio_metrics,
|
422
458
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
423
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
459
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
460
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
461
|
+
"background_audio_suppression" => background_audio_suppression
|
424
462
|
}
|
425
463
|
|
426
464
|
data = audio
|
@@ -439,7 +477,7 @@ module IBMWatson
|
|
439
477
|
end
|
440
478
|
|
441
479
|
##
|
442
|
-
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
480
|
+
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
443
481
|
# Sends audio for speech recognition using web sockets.
|
444
482
|
# @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
|
445
483
|
# @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
|
@@ -449,7 +487,7 @@ module IBMWatson
|
|
449
487
|
# @param customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
|
450
488
|
# @param acoustic_customization_id [String] The GUID of a custom acoustic model that is to be used with the request. The base model of the specified custom acoustic model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom acoustic model is used.
|
451
489
|
# @param language_customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
|
452
|
-
# @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://
|
490
|
+
# @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
|
453
491
|
# @param inactivity_timeout [Integer] The time in seconds after which, if only silence (no speech) is detected in submitted audio, the connection is closed with a 400 error. Useful for stopping audio submission from a live microphone when a user simply walks away. Use `-1` for infinity.
|
454
492
|
# @param interim_results [Boolean] Send back non-final previews of each "sentence" as it is being processed. These results are ignored in text mode.
|
455
493
|
# @param keywords [Array<String>] Array of keyword strings to spot in the audio. Each keyword string can include one or more tokens. Keywords are spotted only in the final hypothesis, not in interim results. If you specify any keywords, you must also specify a keywords threshold. Omit the parameter or specify an empty array if you do not need to spot keywords.
|
@@ -460,13 +498,13 @@ module IBMWatson
|
|
460
498
|
# @param timestamps [Boolean] If `true`, time alignment for each word is returned.
|
461
499
|
# @param profanity_filter [Boolean] If `true` (the default), filters profanity from all output except for keyword results by replacing inappropriate words with a series of asterisks. Set the parameter to `false` to return results with no censoring. Applies to US English transcription only.
|
462
500
|
# @param smart_formatting [Boolean] If `true`, converts dates, times, series of digits and numbers, phone numbers, currency values, and Internet addresses into more readable, conventional representations in the final transcript of a recognition request. If `false` (the default), no formatting is performed. Applies to US English transcription only.
|
463
|
-
# @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://
|
501
|
+
# @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
464
502
|
# @param grammar_name [String] The name of a grammar that is to be used with the recognition request. If you
|
465
503
|
# specify a grammar, you must also use the `language_customization_id` parameter to
|
466
504
|
# specify the name of the custom language model for which the grammar is defined.
|
467
505
|
# The service recognizes only strings that are recognized by the specified grammar;
|
468
506
|
# it does not recognize other custom words from the model's words resource. See
|
469
|
-
# [Grammars](https://cloud.ibm.com/docs/
|
507
|
+
# [Grammars](https://cloud.ibm.com/docs/speech-to-text/output.html).
|
470
508
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
471
509
|
# feature redacts any number that has three or more consecutive digits by replacing
|
472
510
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -481,7 +519,7 @@ module IBMWatson
|
|
481
519
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
482
520
|
#
|
483
521
|
# See [Numeric
|
484
|
-
# redaction](https://cloud.ibm.com/docs/
|
522
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
|
485
523
|
#
|
486
524
|
# @param processing_metrics [Boolean] If `true`, requests processing metrics about the service's transcription of the
|
487
525
|
# input audio. The service returns processing metrics at the interval specified by
|
@@ -503,7 +541,7 @@ module IBMWatson
|
|
503
541
|
# @return [WebSocketClient] Returns a new WebSocketClient object
|
504
542
|
#
|
505
543
|
# See [Audio
|
506
|
-
# metrics](https://cloud.ibm.com/docs/
|
544
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
|
507
545
|
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
508
546
|
# splits a transcript into multiple final results. If the service detects pauses or
|
509
547
|
# extended silence before it reaches the end of the audio stream, its response can
|
@@ -520,7 +558,7 @@ module IBMWatson
|
|
520
558
|
# Chinese is 0.6 seconds.
|
521
559
|
#
|
522
560
|
# See [End of phrase silence
|
523
|
-
# time](https://cloud.ibm.com/docs/
|
561
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
|
524
562
|
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
525
563
|
# based on semantic features of the input, for example, at the conclusion of
|
526
564
|
# meaningful phrases such as sentences. The service bases its understanding of
|
@@ -530,7 +568,33 @@ module IBMWatson
|
|
530
568
|
# interval.
|
531
569
|
#
|
532
570
|
# See [Split transcript at phrase
|
533
|
-
# end](https://cloud.ibm.com/docs/
|
571
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
572
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
573
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
574
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
575
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
576
|
+
# activity.
|
577
|
+
#
|
578
|
+
# Specify a value between 0.0 and 1.0:
|
579
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
580
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
581
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
582
|
+
#
|
583
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
584
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
585
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
586
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
587
|
+
# conversations or background noise.
|
588
|
+
#
|
589
|
+
# Specify a value in the range of 0.0 to 1.0:
|
590
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
591
|
+
# disabled).
|
592
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
593
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
594
|
+
#
|
595
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
596
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
597
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
534
598
|
def recognize_using_websocket(
|
535
599
|
content_type: nil,
|
536
600
|
recognize_callback:,
|
@@ -559,7 +623,9 @@ module IBMWatson
|
|
559
623
|
processing_metrics_interval: nil,
|
560
624
|
audio_metrics: nil,
|
561
625
|
end_of_phrase_silence_time: nil,
|
562
|
-
split_transcript_at_phrase_end: nil
|
626
|
+
split_transcript_at_phrase_end: nil,
|
627
|
+
speech_detector_sensitivity: nil,
|
628
|
+
background_audio_suppression: nil
|
563
629
|
)
|
564
630
|
raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
|
565
631
|
raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
|
@@ -568,6 +634,7 @@ module IBMWatson
|
|
568
634
|
require_relative("./websocket/speech_to_text_websocket_listener.rb")
|
569
635
|
headers = {}
|
570
636
|
headers = conn.default_options.headers.to_hash unless conn.default_options.headers.to_hash.empty?
|
637
|
+
@authenticator.authenticate(headers)
|
571
638
|
service_url = @service_url.gsub("https:", "wss:")
|
572
639
|
params = {
|
573
640
|
"model" => model,
|
@@ -598,7 +665,9 @@ module IBMWatson
|
|
598
665
|
"processing_metrics_interval" => processing_metrics_interval,
|
599
666
|
"audio_metrics" => audio_metrics,
|
600
667
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
601
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
668
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
669
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
670
|
+
"background_audio_suppression" => background_audio_suppression
|
602
671
|
}
|
603
672
|
options.delete_if { |_, v| v.nil? }
|
604
673
|
WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
|
@@ -611,9 +680,9 @@ module IBMWatson
|
|
611
680
|
# @!method register_callback(callback_url:, user_secret: nil)
|
612
681
|
# Register a callback.
|
613
682
|
# Registers a callback URL with the service for use with subsequent asynchronous
|
614
|
-
# recognition requests. The service attempts to register, or
|
615
|
-
#
|
616
|
-
#
|
683
|
+
# recognition requests. The service attempts to register, or allowlist, the callback
|
684
|
+
# URL if it is not already registered by sending a `GET` request to the callback
|
685
|
+
# URL. The service passes a random alphanumeric challenge string via the
|
617
686
|
# `challenge_string` parameter of the request. The request includes an `Accept`
|
618
687
|
# header that specifies `text/plain` as the required response type.
|
619
688
|
#
|
@@ -625,9 +694,9 @@ module IBMWatson
|
|
625
694
|
#
|
626
695
|
# The service sends only a single `GET` request to the callback URL. If the service
|
627
696
|
# does not receive a reply with a response code of 200 and a body that echoes the
|
628
|
-
# challenge string sent by the service within five seconds, it does not
|
697
|
+
# challenge string sent by the service within five seconds, it does not allowlist
|
629
698
|
# the URL; it instead sends status code 400 in response to the **Register a
|
630
|
-
# callback** request. If the requested callback URL is already
|
699
|
+
# callback** request. If the requested callback URL is already allowlisted, the
|
631
700
|
# service responds to the initial registration request with response code 200.
|
632
701
|
#
|
633
702
|
# If you specify a user secret with the request, the service uses it as a key to
|
@@ -643,9 +712,9 @@ module IBMWatson
|
|
643
712
|
# a one-hour span of time.
|
644
713
|
#
|
645
714
|
# **See also:** [Registering a callback
|
646
|
-
# URL](https://cloud.ibm.com/docs/
|
715
|
+
# URL](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#register).
|
647
716
|
# @param callback_url [String] An HTTP or HTTPS URL to which callback notifications are to be sent. To be
|
648
|
-
#
|
717
|
+
# allowlisted, the URL must successfully echo the challenge string during URL
|
649
718
|
# verification. During verification, the client can also check the signature that
|
650
719
|
# the service sends in the `X-Callback-Signature` header to verify the origin of the
|
651
720
|
# request.
|
@@ -683,12 +752,12 @@ module IBMWatson
|
|
683
752
|
##
|
684
753
|
# @!method unregister_callback(callback_url:)
|
685
754
|
# Unregister a callback.
|
686
|
-
# Unregisters a callback URL that was previously
|
755
|
+
# Unregisters a callback URL that was previously allowlisted with a **Register a
|
687
756
|
# callback** request for use with the asynchronous interface. Once unregistered, the
|
688
757
|
# URL can no longer be used with asynchronous recognition requests.
|
689
758
|
#
|
690
759
|
# **See also:** [Unregistering a callback
|
691
|
-
# URL](https://cloud.ibm.com/docs/
|
760
|
+
# URL](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#unregister).
|
692
761
|
# @param callback_url [String] The callback URL that is to be unregistered.
|
693
762
|
# @return [nil]
|
694
763
|
def unregister_callback(callback_url:)
|
@@ -716,7 +785,7 @@ module IBMWatson
|
|
716
785
|
end
|
717
786
|
|
718
787
|
##
|
719
|
-
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
788
|
+
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
720
789
|
# Create a job.
|
721
790
|
# Creates a job for a new asynchronous recognition request. The job is owned by the
|
722
791
|
# instance of the service whose credentials are used to create it. How you learn the
|
@@ -756,7 +825,7 @@ module IBMWatson
|
|
756
825
|
# option to upload the file for the request.)
|
757
826
|
#
|
758
827
|
# **See also:** [Creating a
|
759
|
-
# job](https://cloud.ibm.com/docs/
|
828
|
+
# job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#create).
|
760
829
|
#
|
761
830
|
#
|
762
831
|
# ### Streaming mode
|
@@ -771,9 +840,9 @@ module IBMWatson
|
|
771
840
|
#
|
772
841
|
# **See also:**
|
773
842
|
# * [Audio
|
774
|
-
# transmission](https://cloud.ibm.com/docs/
|
843
|
+
# transmission](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#transmission)
|
775
844
|
# *
|
776
|
-
# [Timeouts](https://cloud.ibm.com/docs/
|
845
|
+
# [Timeouts](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts)
|
777
846
|
#
|
778
847
|
#
|
779
848
|
# ### Audio formats (content types)
|
@@ -813,15 +882,15 @@ module IBMWatson
|
|
813
882
|
# fails.
|
814
883
|
#
|
815
884
|
# **See also:** [Audio
|
816
|
-
# formats](https://cloud.ibm.com/docs/
|
885
|
+
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
|
817
886
|
# @param audio [String] The audio to transcribe.
|
818
887
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
819
888
|
# audio format, see **Audio formats (content types)** in the method description.
|
820
889
|
# @param model [String] The identifier of the model that is to be used for the recognition request. See
|
821
890
|
# [Languages and
|
822
|
-
# models](https://cloud.ibm.com/docs/
|
891
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
823
892
|
# @param callback_url [String] A URL to which callback notifications are to be sent. The URL must already be
|
824
|
-
# successfully
|
893
|
+
# successfully allowlisted by using the **Register a callback** method. You can
|
825
894
|
# include the same callback URL with any number of job creation requests. Omit the
|
826
895
|
# parameter to poll the service for job completion and results.
|
827
896
|
#
|
@@ -859,7 +928,7 @@ module IBMWatson
|
|
859
928
|
# match the model specified with the `model` parameter. You must make the request
|
860
929
|
# with credentials for the instance of the service that owns the custom model. By
|
861
930
|
# default, no custom language model is used. See [Custom
|
862
|
-
# models](https://cloud.ibm.com/docs/
|
931
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
863
932
|
#
|
864
933
|
#
|
865
934
|
# **Note:** Use this parameter instead of the deprecated `customization_id`
|
@@ -869,13 +938,13 @@ module IBMWatson
|
|
869
938
|
# match the model specified with the `model` parameter. You must make the request
|
870
939
|
# with credentials for the instance of the service that owns the custom model. By
|
871
940
|
# default, no custom acoustic model is used. See [Custom
|
872
|
-
# models](https://cloud.ibm.com/docs/
|
941
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
873
942
|
# @param base_model_version [String] The version of the specified base model that is to be used with the recognition
|
874
943
|
# request. Multiple versions of a base model can exist when a model is updated for
|
875
944
|
# internal improvements. The parameter is intended primarily for use with custom
|
876
945
|
# models that have been upgraded for a new base model. The default value depends on
|
877
946
|
# whether the parameter is used with or without a custom model. See [Base model
|
878
|
-
# version](https://cloud.ibm.com/docs/
|
947
|
+
# version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
|
879
948
|
# @param customization_weight [Float] If you specify the customization ID (GUID) of a custom language model with the
|
880
949
|
# recognition request, the customization weight tells the service how much weight to
|
881
950
|
# give to words from the custom language model compared to those from the base model
|
@@ -893,45 +962,51 @@ module IBMWatson
|
|
893
962
|
# phrases.
|
894
963
|
#
|
895
964
|
# See [Custom
|
896
|
-
# models](https://cloud.ibm.com/docs/
|
965
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
897
966
|
# @param inactivity_timeout [Fixnum] The time in seconds after which, if only silence (no speech) is detected in
|
898
967
|
# streaming audio, the connection is closed with a 400 error. The parameter is
|
899
968
|
# useful for stopping audio submission from a live microphone when a user simply
|
900
969
|
# walks away. Use `-1` for infinity. See [Inactivity
|
901
|
-
# timeout](https://cloud.ibm.com/docs/
|
970
|
+
# timeout](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts-inactivity).
|
902
971
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
903
972
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
904
973
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
905
|
-
# threshold.
|
906
|
-
#
|
907
|
-
#
|
974
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
975
|
+
# keywords.
|
976
|
+
#
|
977
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
978
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
979
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
980
|
+
#
|
981
|
+
# See [Keyword
|
982
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
908
983
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
909
984
|
# considered to match a keyword if its confidence is greater than or equal to the
|
910
985
|
# threshold. Specify a probability between 0.0 and 1.0. If you specify a threshold,
|
911
986
|
# you must also specify one or more keywords. The service performs no keyword
|
912
987
|
# spotting if you omit either parameter. See [Keyword
|
913
|
-
# spotting](https://cloud.ibm.com/docs/
|
988
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
914
989
|
# @param max_alternatives [Fixnum] The maximum number of alternative transcripts that the service is to return. By
|
915
990
|
# default, the service returns a single transcript. If you specify a value of `0`,
|
916
991
|
# the service uses the default value, `1`. See [Maximum
|
917
|
-
# alternatives](https://cloud.ibm.com/docs/
|
992
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#max_alternatives).
|
918
993
|
# @param word_alternatives_threshold [Float] A confidence value that is the lower bound for identifying a hypothesis as a
|
919
994
|
# possible word alternative (also known as "Confusion Networks"). An alternative
|
920
995
|
# word is considered if its confidence is greater than or equal to the threshold.
|
921
996
|
# Specify a probability between 0.0 and 1.0. By default, the service computes no
|
922
997
|
# alternative words. See [Word
|
923
|
-
# alternatives](https://cloud.ibm.com/docs/
|
998
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_alternatives).
|
924
999
|
# @param word_confidence [Boolean] If `true`, the service returns a confidence measure in the range of 0.0 to 1.0 for
|
925
1000
|
# each word. By default, the service returns no word confidence scores. See [Word
|
926
|
-
# confidence](https://cloud.ibm.com/docs/
|
1001
|
+
# confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_confidence).
|
927
1002
|
# @param timestamps [Boolean] If `true`, the service returns time alignment for each word. By default, no
|
928
1003
|
# timestamps are returned. See [Word
|
929
|
-
# timestamps](https://cloud.ibm.com/docs/
|
1004
|
+
# timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_timestamps).
|
930
1005
|
# @param profanity_filter [Boolean] If `true`, the service filters profanity from all output except for keyword
|
931
1006
|
# results by replacing inappropriate words with a series of asterisks. Set the
|
932
1007
|
# parameter to `false` to return results with no censoring. Applies to US English
|
933
1008
|
# transcription only. See [Profanity
|
934
|
-
# filtering](https://cloud.ibm.com/docs/
|
1009
|
+
# filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#profanity_filter).
|
935
1010
|
# @param smart_formatting [Boolean] If `true`, the service converts dates, times, series of digits and numbers, phone
|
936
1011
|
# numbers, currency values, and internet addresses into more readable, conventional
|
937
1012
|
# representations in the final transcript of a recognition request. For US English,
|
@@ -941,21 +1016,19 @@ module IBMWatson
|
|
941
1016
|
# **Note:** Applies to US English, Japanese, and Spanish transcription only.
|
942
1017
|
#
|
943
1018
|
# See [Smart
|
944
|
-
# formatting](https://cloud.ibm.com/docs/
|
1019
|
+
# formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#smart_formatting).
|
945
1020
|
# @param speaker_labels [Boolean] If `true`, the response includes labels that identify which words were spoken by
|
946
1021
|
# which participants in a multi-person exchange. By default, the service returns no
|
947
1022
|
# speaker labels. Setting `speaker_labels` to `true` forces the `timestamps`
|
948
1023
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
949
1024
|
# parameter.
|
950
1025
|
#
|
951
|
-
# **Note:** Applies to US English,
|
952
|
-
# narrowband models) and UK English (narrowband model)
|
953
|
-
#
|
954
|
-
# **Get a model** method and check that the attribute `speaker_labels` is set to
|
955
|
-
# `true`.
|
1026
|
+
# **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
|
1027
|
+
# Spanish (both broadband and narrowband models) and UK English (narrowband model)
|
1028
|
+
# transcription only.
|
956
1029
|
#
|
957
1030
|
# See [Speaker
|
958
|
-
# labels](https://cloud.ibm.com/docs/
|
1031
|
+
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
959
1032
|
# @param customization_id [String] **Deprecated.** Use the `language_customization_id` parameter to specify the
|
960
1033
|
# customization ID (GUID) of a custom language model that is to be used with the
|
961
1034
|
# recognition request. Do not specify both parameters with a request.
|
@@ -964,7 +1037,7 @@ module IBMWatson
|
|
964
1037
|
# specify the name of the custom language model for which the grammar is defined.
|
965
1038
|
# The service recognizes only strings that are recognized by the specified grammar;
|
966
1039
|
# it does not recognize other custom words from the model's words resource. See
|
967
|
-
# [Grammars](https://cloud.ibm.com/docs/
|
1040
|
+
# [Grammars](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#grammars-input).
|
968
1041
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
969
1042
|
# feature redacts any number that has three or more consecutive digits by replacing
|
970
1043
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -979,7 +1052,7 @@ module IBMWatson
|
|
979
1052
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
980
1053
|
#
|
981
1054
|
# See [Numeric
|
982
|
-
# redaction](https://cloud.ibm.com/docs/
|
1055
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
|
983
1056
|
# @param processing_metrics [Boolean] If `true`, requests processing metrics about the service's transcription of the
|
984
1057
|
# input audio. The service returns processing metrics at the interval specified by
|
985
1058
|
# the `processing_metrics_interval` parameter. It also returns processing metrics
|
@@ -987,7 +1060,7 @@ module IBMWatson
|
|
987
1060
|
# the service returns no processing metrics.
|
988
1061
|
#
|
989
1062
|
# See [Processing
|
990
|
-
# metrics](https://cloud.ibm.com/docs/
|
1063
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing_metrics).
|
991
1064
|
# @param processing_metrics_interval [Float] Specifies the interval in real wall-clock seconds at which the service is to
|
992
1065
|
# return processing metrics. The parameter is ignored unless the
|
993
1066
|
# `processing_metrics` parameter is set to `true`.
|
@@ -1001,13 +1074,13 @@ module IBMWatson
|
|
1001
1074
|
# the service returns processing metrics only for transcription events.
|
1002
1075
|
#
|
1003
1076
|
# See [Processing
|
1004
|
-
# metrics](https://cloud.ibm.com/docs/
|
1077
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing_metrics).
|
1005
1078
|
# @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
|
1006
1079
|
# input audio. The service returns audio metrics with the final transcription
|
1007
1080
|
# results. By default, the service returns no audio metrics.
|
1008
1081
|
#
|
1009
1082
|
# See [Audio
|
1010
|
-
# metrics](https://cloud.ibm.com/docs/
|
1083
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
|
1011
1084
|
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
1012
1085
|
# splits a transcript into multiple final results. If the service detects pauses or
|
1013
1086
|
# extended silence before it reaches the end of the audio stream, its response can
|
@@ -1024,7 +1097,7 @@ module IBMWatson
|
|
1024
1097
|
# Chinese is 0.6 seconds.
|
1025
1098
|
#
|
1026
1099
|
# See [End of phrase silence
|
1027
|
-
# time](https://cloud.ibm.com/docs/
|
1100
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
|
1028
1101
|
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
1029
1102
|
# based on semantic features of the input, for example, at the conclusion of
|
1030
1103
|
# meaningful phrases such as sentences. The service bases its understanding of
|
@@ -1034,9 +1107,34 @@ module IBMWatson
|
|
1034
1107
|
# interval.
|
1035
1108
|
#
|
1036
1109
|
# See [Split transcript at phrase
|
1037
|
-
# end](https://cloud.ibm.com/docs/
|
1110
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
1111
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
1112
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
1113
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
1114
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
1115
|
+
# activity.
|
1116
|
+
#
|
1117
|
+
# Specify a value between 0.0 and 1.0:
|
1118
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
1119
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
1120
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
1121
|
+
#
|
1122
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1123
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1124
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
1125
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
1126
|
+
# conversations or background noise.
|
1127
|
+
#
|
1128
|
+
# Specify a value in the range of 0.0 to 1.0:
|
1129
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
1130
|
+
# disabled).
|
1131
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
1132
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
1133
|
+
#
|
1134
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1135
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1038
1136
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1039
|
-
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
1137
|
+
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
1040
1138
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
1041
1139
|
|
1042
1140
|
headers = {
|
@@ -1073,7 +1171,9 @@ module IBMWatson
|
|
1073
1171
|
"processing_metrics_interval" => processing_metrics_interval,
|
1074
1172
|
"audio_metrics" => audio_metrics,
|
1075
1173
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
1076
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
1174
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
1175
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
1176
|
+
"background_audio_suppression" => background_audio_suppression
|
1077
1177
|
}
|
1078
1178
|
|
1079
1179
|
data = audio
|
@@ -1104,7 +1204,7 @@ module IBMWatson
|
|
1104
1204
|
# first.
|
1105
1205
|
#
|
1106
1206
|
# **See also:** [Checking the status of the latest
|
1107
|
-
# jobs](https://cloud.ibm.com/docs/
|
1207
|
+
# jobs](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#jobs).
|
1108
1208
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1109
1209
|
def check_jobs
|
1110
1210
|
headers = {
|
@@ -1139,7 +1239,7 @@ module IBMWatson
|
|
1139
1239
|
# recent jobs associated with the calling credentials.
|
1140
1240
|
#
|
1141
1241
|
# **See also:** [Checking the status and retrieving the results of a
|
1142
|
-
# job](https://cloud.ibm.com/docs/
|
1242
|
+
# job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#job).
|
1143
1243
|
# @param id [String] The identifier of the asynchronous job that is to be used for the request. You
|
1144
1244
|
# must make the request with credentials for the instance of the service that owns
|
1145
1245
|
# the job.
|
@@ -1173,7 +1273,7 @@ module IBMWatson
|
|
1173
1273
|
# owns a job to delete it.
|
1174
1274
|
#
|
1175
1275
|
# **See also:** [Deleting a
|
1176
|
-
# job](https://cloud.ibm.com/docs/
|
1276
|
+
# job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#delete-async).
|
1177
1277
|
# @param id [String] The identifier of the asynchronous job that is to be used for the request. You
|
1178
1278
|
# must make the request with credentials for the instance of the service that owns
|
1179
1279
|
# the job.
|
@@ -1214,7 +1314,7 @@ module IBMWatson
|
|
1214
1314
|
# below the limit.
|
1215
1315
|
#
|
1216
1316
|
# **See also:** [Create a custom language
|
1217
|
-
# model](https://cloud.ibm.com/docs/
|
1317
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#createModel-language).
|
1218
1318
|
# @param name [String] A user-defined name for the new custom language model. Use a name that is unique
|
1219
1319
|
# among all custom language models that you own. Use a localized name that matches
|
1220
1320
|
# the language of the custom model. Use a name that describes the domain of the
|
@@ -1226,7 +1326,7 @@ module IBMWatson
|
|
1226
1326
|
# To determine whether a base model supports language model customization, use the
|
1227
1327
|
# **Get a model** method and check that the attribute `custom_language_model` is set
|
1228
1328
|
# to `true`. You can also refer to [Language support for
|
1229
|
-
# customization](https://cloud.ibm.com/docs/
|
1329
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
1230
1330
|
# @param dialect [String] The dialect of the specified language that is to be used with the custom language
|
1231
1331
|
# model. For most languages, the dialect matches the language of the base model by
|
1232
1332
|
# default. For example, `en-US` is used for either of the US English language
|
@@ -1288,10 +1388,12 @@ module IBMWatson
|
|
1288
1388
|
# a model to list information about it.
|
1289
1389
|
#
|
1290
1390
|
# **See also:** [Listing custom language
|
1291
|
-
# models](https://cloud.ibm.com/docs/
|
1391
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language).
|
1292
1392
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
1293
|
-
# are to be returned
|
1294
|
-
#
|
1393
|
+
# are to be returned. Omit the parameter to see all custom language or custom
|
1394
|
+
# acoustic models that are owned by the requesting credentials. **Note:** The
|
1395
|
+
# `ar-AR` (Modern Standard Arabic) and `zh-CN` (Mandarin Chinese) languages are not
|
1396
|
+
# available for language model customization.
|
1295
1397
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1296
1398
|
def list_language_models(language: nil)
|
1297
1399
|
headers = {
|
@@ -1322,7 +1424,7 @@ module IBMWatson
|
|
1322
1424
|
# for the instance of the service that owns a model to list information about it.
|
1323
1425
|
#
|
1324
1426
|
# **See also:** [Listing custom language
|
1325
|
-
# models](https://cloud.ibm.com/docs/
|
1427
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language).
|
1326
1428
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1327
1429
|
# the request. You must make the request with credentials for the instance of the
|
1328
1430
|
# service that owns the custom model.
|
@@ -1355,7 +1457,7 @@ module IBMWatson
|
|
1355
1457
|
# owns a model to delete it.
|
1356
1458
|
#
|
1357
1459
|
# **See also:** [Deleting a custom language
|
1358
|
-
# model](https://cloud.ibm.com/docs/
|
1460
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#deleteModel-language).
|
1359
1461
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1360
1462
|
# the request. You must make the request with credentials for the instance of the
|
1361
1463
|
# service that owns the custom model.
|
@@ -1403,7 +1505,7 @@ module IBMWatson
|
|
1403
1505
|
# requests to add new resources until the existing request completes.
|
1404
1506
|
#
|
1405
1507
|
# **See also:** [Train the custom language
|
1406
|
-
# model](https://cloud.ibm.com/docs/
|
1508
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#trainModel-language).
|
1407
1509
|
#
|
1408
1510
|
#
|
1409
1511
|
# ### Training failures
|
@@ -1479,7 +1581,7 @@ module IBMWatson
|
|
1479
1581
|
# it.
|
1480
1582
|
#
|
1481
1583
|
# **See also:** [Resetting a custom language
|
1482
|
-
# model](https://cloud.ibm.com/docs/
|
1584
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#resetModel-language).
|
1483
1585
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1484
1586
|
# the request. You must make the request with credentials for the instance of the
|
1485
1587
|
# service that owns the custom model.
|
@@ -1523,7 +1625,7 @@ module IBMWatson
|
|
1523
1625
|
# subsequent requests for the model until the upgrade completes.
|
1524
1626
|
#
|
1525
1627
|
# **See also:** [Upgrading a custom language
|
1526
|
-
# model](https://cloud.ibm.com/docs/
|
1628
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeLanguage).
|
1527
1629
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1528
1630
|
# the request. You must make the request with credentials for the instance of the
|
1529
1631
|
# service that owns the custom model.
|
@@ -1559,7 +1661,7 @@ module IBMWatson
|
|
1559
1661
|
# that owns a model to list its corpora.
|
1560
1662
|
#
|
1561
1663
|
# **See also:** [Listing corpora for a custom language
|
1562
|
-
# model](https://cloud.ibm.com/docs/
|
1664
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#listCorpora).
|
1563
1665
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1564
1666
|
# the request. You must make the request with credentials for the instance of the
|
1565
1667
|
# service that owns the custom model.
|
@@ -1599,18 +1701,20 @@ module IBMWatson
|
|
1599
1701
|
#
|
1600
1702
|
# The call returns an HTTP 201 response code if the corpus is valid. The service
|
1601
1703
|
# then asynchronously processes the contents of the corpus and automatically
|
1602
|
-
# extracts new words that it finds. This can take on the order of
|
1603
|
-
# complete depending on the total number of words and the number of new words in
|
1604
|
-
# corpus, as well as the current load on the service. You cannot submit requests
|
1605
|
-
# add additional resources to the custom model or to train the model until the
|
1704
|
+
# extracts new words that it finds. This operation can take on the order of minutes
|
1705
|
+
# to complete depending on the total number of words and the number of new words in
|
1706
|
+
# the corpus, as well as the current load on the service. You cannot submit requests
|
1707
|
+
# to add additional resources to the custom model or to train the model until the
|
1606
1708
|
# service's analysis of the corpus for the current request completes. Use the **List
|
1607
1709
|
# a corpus** method to check the status of the analysis.
|
1608
1710
|
#
|
1609
1711
|
# The service auto-populates the model's words resource with words from the corpus
|
1610
|
-
# that are not found in its base vocabulary. These are referred to as
|
1611
|
-
# out-of-vocabulary (OOV) words.
|
1612
|
-
#
|
1613
|
-
#
|
1712
|
+
# that are not found in its base vocabulary. These words are referred to as
|
1713
|
+
# out-of-vocabulary (OOV) words. After adding a corpus, you must validate the words
|
1714
|
+
# resource to ensure that each OOV word's definition is complete and valid. You can
|
1715
|
+
# use the **List custom words** method to examine the words resource. You can use
|
1716
|
+
# other words method to eliminate typos and modify how words are pronounced as
|
1717
|
+
# needed.
|
1614
1718
|
#
|
1615
1719
|
# To add a corpus file that has the same name as an existing corpus, set the
|
1616
1720
|
# `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
|
@@ -1627,10 +1731,12 @@ module IBMWatson
|
|
1627
1731
|
# directly.
|
1628
1732
|
#
|
1629
1733
|
# **See also:**
|
1630
|
-
# * [Working with
|
1631
|
-
# corpora](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
|
1632
1734
|
# * [Add a corpus to the custom language
|
1633
|
-
# model](https://cloud.ibm.com/docs/
|
1735
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus)
|
1736
|
+
# * [Working with
|
1737
|
+
# corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
|
1738
|
+
# * [Validating a words
|
1739
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1634
1740
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1635
1741
|
# the request. You must make the request with credentials for the instance of the
|
1636
1742
|
# service that owns the custom model.
|
@@ -1656,7 +1762,7 @@ module IBMWatson
|
|
1656
1762
|
# Make sure that you know the character encoding of the file. You must use that
|
1657
1763
|
# encoding when working with the words in the custom language model. For more
|
1658
1764
|
# information, see [Character
|
1659
|
-
# encoding](https://cloud.ibm.com/docs/
|
1765
|
+
# encoding](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#charEncoding).
|
1660
1766
|
#
|
1661
1767
|
#
|
1662
1768
|
# With the `curl` command, use the `--data-binary` option to upload the file for the
|
@@ -1710,7 +1816,7 @@ module IBMWatson
|
|
1710
1816
|
# that owns a model to list its corpora.
|
1711
1817
|
#
|
1712
1818
|
# **See also:** [Listing corpora for a custom language
|
1713
|
-
# model](https://cloud.ibm.com/docs/
|
1819
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#listCorpora).
|
1714
1820
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1715
1821
|
# the request. You must make the request with credentials for the instance of the
|
1716
1822
|
# service that owns the custom model.
|
@@ -1750,7 +1856,7 @@ module IBMWatson
|
|
1750
1856
|
# corpora.
|
1751
1857
|
#
|
1752
1858
|
# **See also:** [Deleting a corpus from a custom language
|
1753
|
-
# model](https://cloud.ibm.com/docs/
|
1859
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#deleteCorpus).
|
1754
1860
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1755
1861
|
# the request. You must make the request with credentials for the instance of the
|
1756
1862
|
# service that owns the custom model.
|
@@ -1792,7 +1898,7 @@ module IBMWatson
|
|
1792
1898
|
# service that owns a model to list information about its words.
|
1793
1899
|
#
|
1794
1900
|
# **See also:** [Listing words from a custom language
|
1795
|
-
# model](https://cloud.ibm.com/docs/
|
1901
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#listWords).
|
1796
1902
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1797
1903
|
# the request. You must make the request with credentials for the instance of the
|
1798
1904
|
# service that owns the custom model.
|
@@ -1859,7 +1965,10 @@ module IBMWatson
|
|
1859
1965
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1860
1966
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1861
1967
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1862
|
-
# word.
|
1968
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
1969
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
1970
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
1971
|
+
# valid.
|
1863
1972
|
# * The `display_as` field provides a different way of spelling the word in a
|
1864
1973
|
# transcript. Use the parameter when you want the word to appear different from its
|
1865
1974
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1889,10 +1998,12 @@ module IBMWatson
|
|
1889
1998
|
#
|
1890
1999
|
#
|
1891
2000
|
# **See also:**
|
1892
|
-
# * [Working with custom
|
1893
|
-
# words](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1894
2001
|
# * [Add words to the custom language
|
1895
|
-
# model](https://cloud.ibm.com/docs/
|
2002
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
2003
|
+
# * [Working with custom
|
2004
|
+
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
2005
|
+
# * [Validating a words
|
2006
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1896
2007
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1897
2008
|
# the request. You must make the request with credentials for the instance of the
|
1898
2009
|
# service that owns the custom model.
|
@@ -1948,7 +2059,10 @@ module IBMWatson
|
|
1948
2059
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1949
2060
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1950
2061
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1951
|
-
# word.
|
2062
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
2063
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
2064
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
2065
|
+
# valid.
|
1952
2066
|
# * The `display_as` field provides a different way of spelling the word in a
|
1953
2067
|
# transcript. Use the parameter when you want the word to appear different from its
|
1954
2068
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1960,10 +2074,12 @@ module IBMWatson
|
|
1960
2074
|
# the **List a custom word** method to review the word that you add.
|
1961
2075
|
#
|
1962
2076
|
# **See also:**
|
1963
|
-
# * [Working with custom
|
1964
|
-
# words](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1965
2077
|
# * [Add words to the custom language
|
1966
|
-
# model](https://cloud.ibm.com/docs/
|
2078
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
2079
|
+
# * [Working with custom
|
2080
|
+
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
2081
|
+
# * [Validating a words
|
2082
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1967
2083
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1968
2084
|
# the request. You must make the request with credentials for the instance of the
|
1969
2085
|
# service that owns the custom model.
|
@@ -1971,7 +2087,7 @@ module IBMWatson
|
|
1971
2087
|
# not include spaces in the word. Use a `-` (dash) or `_` (underscore) to connect
|
1972
2088
|
# the tokens of compound words. URL-encode the word if it includes non-ASCII
|
1973
2089
|
# characters. For more information, see [Character
|
1974
|
-
# encoding](https://cloud.ibm.com/docs/
|
2090
|
+
# encoding](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#charEncoding).
|
1975
2091
|
# @param word [String] For the **Add custom words** method, you must specify the custom word that is to
|
1976
2092
|
# be added to or updated in the custom model. Do not include spaces in the word. Use
|
1977
2093
|
# a `-` (dash) or `_` (underscore) to connect the tokens of compound words.
|
@@ -2029,13 +2145,13 @@ module IBMWatson
|
|
2029
2145
|
# about its words.
|
2030
2146
|
#
|
2031
2147
|
# **See also:** [Listing words from a custom language
|
2032
|
-
# model](https://cloud.ibm.com/docs/
|
2148
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#listWords).
|
2033
2149
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2034
2150
|
# the request. You must make the request with credentials for the instance of the
|
2035
2151
|
# service that owns the custom model.
|
2036
2152
|
# @param word_name [String] The custom word that is to be read from the custom language model. URL-encode the
|
2037
2153
|
# word if it includes non-ASCII characters. For more information, see [Character
|
2038
|
-
# encoding](https://cloud.ibm.com/docs/
|
2154
|
+
# encoding](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#charEncoding).
|
2039
2155
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2040
2156
|
def get_word(customization_id:, word_name:)
|
2041
2157
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2070,13 +2186,13 @@ module IBMWatson
|
|
2070
2186
|
# instance of the service that owns a model to delete its words.
|
2071
2187
|
#
|
2072
2188
|
# **See also:** [Deleting a word from a custom language
|
2073
|
-
# model](https://cloud.ibm.com/docs/
|
2189
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#deleteWord).
|
2074
2190
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2075
2191
|
# the request. You must make the request with credentials for the instance of the
|
2076
2192
|
# service that owns the custom model.
|
2077
2193
|
# @param word_name [String] The custom word that is to be deleted from the custom language model. URL-encode
|
2078
2194
|
# the word if it includes non-ASCII characters. For more information, see [Character
|
2079
|
-
# encoding](https://cloud.ibm.com/docs/
|
2195
|
+
# encoding](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#charEncoding).
|
2080
2196
|
# @return [nil]
|
2081
2197
|
def delete_word(customization_id:, word_name:)
|
2082
2198
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2111,7 +2227,7 @@ module IBMWatson
|
|
2111
2227
|
# model to list its grammars.
|
2112
2228
|
#
|
2113
2229
|
# **See also:** [Listing grammars from a custom language
|
2114
|
-
# model](https://cloud.ibm.com/docs/
|
2230
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageGrammars#listGrammars).
|
2115
2231
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2116
2232
|
# the request. You must make the request with credentials for the instance of the
|
2117
2233
|
# service that owns the custom model.
|
@@ -2147,12 +2263,12 @@ module IBMWatson
|
|
2147
2263
|
#
|
2148
2264
|
# The call returns an HTTP 201 response code if the grammar is valid. The service
|
2149
2265
|
# then asynchronously processes the contents of the grammar and automatically
|
2150
|
-
# extracts new words that it finds. This can take a few seconds
|
2151
|
-
# depending on the size and complexity of the grammar, as well as the
|
2152
|
-
# on the service. You cannot submit requests to add additional
|
2153
|
-
# custom model or to train the model until the service's analysis
|
2154
|
-
# the current request completes. Use the **Get a grammar** method
|
2155
|
-
# status of the analysis.
|
2266
|
+
# extracts new words that it finds. This operation can take a few seconds or minutes
|
2267
|
+
# to complete depending on the size and complexity of the grammar, as well as the
|
2268
|
+
# current load on the service. You cannot submit requests to add additional
|
2269
|
+
# resources to the custom model or to train the model until the service's analysis
|
2270
|
+
# of the grammar for the current request completes. Use the **Get a grammar** method
|
2271
|
+
# to check the status of the analysis.
|
2156
2272
|
#
|
2157
2273
|
# The service populates the model's words resource with any word that is recognized
|
2158
2274
|
# by the grammar that is not found in the model's base vocabulary. These are
|
@@ -2175,9 +2291,9 @@ module IBMWatson
|
|
2175
2291
|
#
|
2176
2292
|
# **See also:**
|
2177
2293
|
# * [Understanding
|
2178
|
-
# grammars](https://cloud.ibm.com/docs/
|
2294
|
+
# grammars](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarUnderstand#grammarUnderstand)
|
2179
2295
|
# * [Add a grammar to the custom language
|
2180
|
-
# model](https://cloud.ibm.com/docs/
|
2296
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarAdd#addGrammar).
|
2181
2297
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2182
2298
|
# the request. You must make the request with credentials for the instance of the
|
2183
2299
|
# service that owns the custom model.
|
@@ -2257,7 +2373,7 @@ module IBMWatson
|
|
2257
2373
|
# model to list its grammars.
|
2258
2374
|
#
|
2259
2375
|
# **See also:** [Listing grammars from a custom language
|
2260
|
-
# model](https://cloud.ibm.com/docs/
|
2376
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageGrammars#listGrammars).
|
2261
2377
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2262
2378
|
# the request. You must make the request with credentials for the instance of the
|
2263
2379
|
# service that owns the custom model.
|
@@ -2296,7 +2412,7 @@ module IBMWatson
|
|
2296
2412
|
# for the instance of the service that owns a model to delete its grammar.
|
2297
2413
|
#
|
2298
2414
|
# **See also:** [Deleting a grammar from a custom language
|
2299
|
-
# model](https://cloud.ibm.com/docs/
|
2415
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageGrammars#deleteGrammar).
|
2300
2416
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2301
2417
|
# the request. You must make the request with credentials for the instance of the
|
2302
2418
|
# service that owns the custom model.
|
@@ -2340,7 +2456,7 @@ module IBMWatson
|
|
2340
2456
|
# below the limit.
|
2341
2457
|
#
|
2342
2458
|
# **See also:** [Create a custom acoustic
|
2343
|
-
# model](https://cloud.ibm.com/docs/
|
2459
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acoustic#createModel-acoustic).
|
2344
2460
|
# @param name [String] A user-defined name for the new custom acoustic model. Use a name that is unique
|
2345
2461
|
# among all custom acoustic models that you own. Use a localized name that matches
|
2346
2462
|
# the language of the custom model. Use a name that describes the acoustic
|
@@ -2352,7 +2468,7 @@ module IBMWatson
|
|
2352
2468
|
#
|
2353
2469
|
# To determine whether a base model supports acoustic model customization, refer to
|
2354
2470
|
# [Language support for
|
2355
|
-
# customization](https://cloud.ibm.com/docs/
|
2471
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
2356
2472
|
# @param description [String] A description of the new custom acoustic model. Use a localized description that
|
2357
2473
|
# matches the language of the custom model.
|
2358
2474
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
@@ -2394,10 +2510,12 @@ module IBMWatson
|
|
2394
2510
|
# a model to list information about it.
|
2395
2511
|
#
|
2396
2512
|
# **See also:** [Listing custom acoustic
|
2397
|
-
# models](https://cloud.ibm.com/docs/
|
2513
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic).
|
2398
2514
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
2399
|
-
# are to be returned
|
2400
|
-
#
|
2515
|
+
# are to be returned. Omit the parameter to see all custom language or custom
|
2516
|
+
# acoustic models that are owned by the requesting credentials. **Note:** The
|
2517
|
+
# `ar-AR` (Modern Standard Arabic) and `zh-CN` (Mandarin Chinese) languages are not
|
2518
|
+
# available for language model customization.
|
2401
2519
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2402
2520
|
def list_acoustic_models(language: nil)
|
2403
2521
|
headers = {
|
@@ -2428,7 +2546,7 @@ module IBMWatson
|
|
2428
2546
|
# for the instance of the service that owns a model to list information about it.
|
2429
2547
|
#
|
2430
2548
|
# **See also:** [Listing custom acoustic
|
2431
|
-
# models](https://cloud.ibm.com/docs/
|
2549
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic).
|
2432
2550
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2433
2551
|
# the request. You must make the request with credentials for the instance of the
|
2434
2552
|
# service that owns the custom model.
|
@@ -2461,7 +2579,7 @@ module IBMWatson
|
|
2461
2579
|
# model to delete it.
|
2462
2580
|
#
|
2463
2581
|
# **See also:** [Deleting a custom acoustic
|
2464
|
-
# model](https://cloud.ibm.com/docs/
|
2582
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#deleteModel-acoustic).
|
2465
2583
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2466
2584
|
# the request. You must make the request with credentials for the instance of the
|
2467
2585
|
# service that owns the custom model.
|
@@ -2499,7 +2617,7 @@ module IBMWatson
|
|
2499
2617
|
# to complete depending on the total amount of audio data on which the custom
|
2500
2618
|
# acoustic model is being trained and the current load on the service. Typically,
|
2501
2619
|
# training a custom acoustic model takes approximately two to four times the length
|
2502
|
-
# of its audio data. The
|
2620
|
+
# of its audio data. The actual time depends on the model being trained and the
|
2503
2621
|
# nature of the audio, such as whether the audio is clean or noisy. The method
|
2504
2622
|
# returns an HTTP 200 response code to indicate that the training process has begun.
|
2505
2623
|
#
|
@@ -2518,14 +2636,15 @@ module IBMWatson
|
|
2518
2636
|
# Train with a custom language model if you have verbatim transcriptions of the
|
2519
2637
|
# audio files that you have added to the custom model or you have either corpora
|
2520
2638
|
# (text files) or a list of words that are relevant to the contents of the audio
|
2521
|
-
# files.
|
2522
|
-
# base model
|
2639
|
+
# files. For training to succeed, both of the custom models must be based on the
|
2640
|
+
# same version of the same base model, and the custom language model must be fully
|
2641
|
+
# trained and available.
|
2523
2642
|
#
|
2524
2643
|
# **See also:**
|
2525
2644
|
# * [Train the custom acoustic
|
2526
|
-
# model](https://cloud.ibm.com/docs/
|
2645
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acoustic#trainModel-acoustic)
|
2527
2646
|
# * [Using custom acoustic and custom language models
|
2528
|
-
# together](https://cloud.ibm.com/docs/
|
2647
|
+
# together](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-useBoth#useBoth)
|
2529
2648
|
#
|
2530
2649
|
#
|
2531
2650
|
# ### Training failures
|
@@ -2535,6 +2654,9 @@ module IBMWatson
|
|
2535
2654
|
# another training request or a request to add audio resources to the model.
|
2536
2655
|
# * The custom model contains less than 10 minutes or more than 200 hours of audio
|
2537
2656
|
# data.
|
2657
|
+
# * You passed a custom language model with the `custom_language_model_id` query
|
2658
|
+
# parameter that is not in the available state. A custom language model must be
|
2659
|
+
# fully trained and available to be used to train a custom acoustic model.
|
2538
2660
|
# * You passed an incompatible custom language model with the
|
2539
2661
|
# `custom_language_model_id` query parameter. Both custom models must be based on
|
2540
2662
|
# the same version of the same base model.
|
@@ -2550,8 +2672,8 @@ module IBMWatson
|
|
2550
2672
|
# been trained with verbatim transcriptions of the audio resources or that contains
|
2551
2673
|
# words that are relevant to the contents of the audio resources. The custom
|
2552
2674
|
# language model must be based on the same version of the same base model as the
|
2553
|
-
# custom acoustic model
|
2554
|
-
# custom models.
|
2675
|
+
# custom acoustic model, and the custom language model must be fully trained and
|
2676
|
+
# available. The credentials specified with the request must own both custom models.
|
2555
2677
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2556
2678
|
def train_acoustic_model(customization_id:, custom_language_model_id: nil)
|
2557
2679
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2590,7 +2712,7 @@ module IBMWatson
|
|
2590
2712
|
# owns a model to reset it.
|
2591
2713
|
#
|
2592
2714
|
# **See also:** [Resetting a custom acoustic
|
2593
|
-
# model](https://cloud.ibm.com/docs/
|
2715
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#resetModel-acoustic).
|
2594
2716
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2595
2717
|
# the request. You must make the request with credentials for the instance of the
|
2596
2718
|
# service that owns the custom model.
|
@@ -2643,20 +2765,21 @@ module IBMWatson
|
|
2643
2765
|
# acoustic model was not trained with a custom language model.
|
2644
2766
|
#
|
2645
2767
|
# **See also:** [Upgrading a custom acoustic
|
2646
|
-
# model](https://cloud.ibm.com/docs/
|
2768
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeAcoustic).
|
2647
2769
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2648
2770
|
# the request. You must make the request with credentials for the instance of the
|
2649
2771
|
# service that owns the custom model.
|
2650
2772
|
# @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
|
2651
2773
|
# customization ID (GUID) of that custom language model. The custom language model
|
2652
|
-
# must be upgraded before the custom acoustic model can be upgraded. The
|
2653
|
-
#
|
2774
|
+
# must be upgraded before the custom acoustic model can be upgraded. The custom
|
2775
|
+
# language model must be fully trained and available. The credentials specified with
|
2776
|
+
# the request must own both custom models.
|
2654
2777
|
# @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
|
2655
2778
|
# has been modified since it was last trained. Use this parameter only to force the
|
2656
2779
|
# upgrade of a custom acoustic model that is trained with a custom language model,
|
2657
2780
|
# and only if you receive a 400 response code and the message `No input data
|
2658
2781
|
# modified since last training`. See [Upgrading a custom acoustic
|
2659
|
-
# model](https://cloud.ibm.com/docs/
|
2782
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeAcoustic).
|
2660
2783
|
# @return [nil]
|
2661
2784
|
def upgrade_acoustic_model(customization_id:, custom_language_model_id: nil, force: nil)
|
2662
2785
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2697,7 +2820,7 @@ module IBMWatson
|
|
2697
2820
|
# the instance of the service that owns a model to list its audio resources.
|
2698
2821
|
#
|
2699
2822
|
# **See also:** [Listing audio resources for a custom acoustic
|
2700
|
-
# model](https://cloud.ibm.com/docs/
|
2823
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAudio#listAudio).
|
2701
2824
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2702
2825
|
# the request. You must make the request with credentials for the instance of the
|
2703
2826
|
# service that owns the custom model.
|
@@ -2745,14 +2868,14 @@ module IBMWatson
|
|
2745
2868
|
# same name as an existing audio resource, set the `allow_overwrite` parameter to
|
2746
2869
|
# `true`; otherwise, the request fails.
|
2747
2870
|
#
|
2748
|
-
# The method is asynchronous. It can take several seconds to complete
|
2749
|
-
# the duration of the audio and, in the case of an archive file, the
|
2750
|
-
# audio files being processed. The service returns a 201 response
|
2751
|
-
# is valid. It then asynchronously analyzes the contents of the
|
2752
|
-
# and automatically extracts information about the audio such as
|
2753
|
-
# sampling rate, and encoding. You cannot submit requests to train or
|
2754
|
-
# model until the service's analysis of all audio resources for current
|
2755
|
-
# completes.
|
2871
|
+
# The method is asynchronous. It can take several seconds or minutes to complete
|
2872
|
+
# depending on the duration of the audio and, in the case of an archive file, the
|
2873
|
+
# total number of audio files being processed. The service returns a 201 response
|
2874
|
+
# code if the audio is valid. It then asynchronously analyzes the contents of the
|
2875
|
+
# audio file or files and automatically extracts information about the audio such as
|
2876
|
+
# its length, sampling rate, and encoding. You cannot submit requests to train or
|
2877
|
+
# upgrade the model until the service's analysis of all audio resources for current
|
2878
|
+
# requests completes.
|
2756
2879
|
#
|
2757
2880
|
# To determine the status of the service's analysis of the audio, use the **Get an
|
2758
2881
|
# audio resource** method to poll the status of the audio. The method accepts the
|
@@ -2761,7 +2884,7 @@ module IBMWatson
|
|
2761
2884
|
# every few seconds until it becomes `ok`.
|
2762
2885
|
#
|
2763
2886
|
# **See also:** [Add audio to the custom acoustic
|
2764
|
-
# model](https://cloud.ibm.com/docs/
|
2887
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acoustic#addAudio).
|
2765
2888
|
#
|
2766
2889
|
#
|
2767
2890
|
# ### Content types for audio-type resources
|
@@ -2795,7 +2918,7 @@ module IBMWatson
|
|
2795
2918
|
# service labels the audio file as `invalid`.
|
2796
2919
|
#
|
2797
2920
|
# **See also:** [Audio
|
2798
|
-
# formats](https://cloud.ibm.com/docs/
|
2921
|
+
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
|
2799
2922
|
#
|
2800
2923
|
#
|
2801
2924
|
# ### Content types for archive-type resources
|
@@ -2931,7 +3054,7 @@ module IBMWatson
|
|
2931
3054
|
# its audio resources.
|
2932
3055
|
#
|
2933
3056
|
# **See also:** [Listing audio resources for a custom acoustic
|
2934
|
-
# model](https://cloud.ibm.com/docs/
|
3057
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAudio#listAudio).
|
2935
3058
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2936
3059
|
# the request. You must make the request with credentials for the instance of the
|
2937
3060
|
# service that owns the custom model.
|
@@ -2972,7 +3095,7 @@ module IBMWatson
|
|
2972
3095
|
# service that owns a model to delete its audio resources.
|
2973
3096
|
#
|
2974
3097
|
# **See also:** [Deleting an audio resource from a custom acoustic
|
2975
|
-
# model](https://cloud.ibm.com/docs/
|
3098
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAudio#deleteAudio).
|
2976
3099
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2977
3100
|
# the request. You must make the request with credentials for the instance of the
|
2978
3101
|
# service that owns the custom model.
|
@@ -3009,13 +3132,18 @@ module IBMWatson
|
|
3009
3132
|
# deletes all data for the customer ID, regardless of the method by which the
|
3010
3133
|
# information was added. The method has no effect if no data is associated with the
|
3011
3134
|
# customer ID. You must issue the request with credentials for the same instance of
|
3012
|
-
# the service that was used to associate the customer ID with the data.
|
3135
|
+
# the service that was used to associate the customer ID with the data. You
|
3136
|
+
# associate a customer ID with data by passing the `X-Watson-Metadata` header with a
|
3137
|
+
# request that passes the data.
|
3013
3138
|
#
|
3014
|
-
#
|
3015
|
-
#
|
3139
|
+
# **Note:** If you delete an instance of the service from the service console, all
|
3140
|
+
# data associated with that service instance is automatically deleted. This includes
|
3141
|
+
# all custom language models, corpora, grammars, and words; all custom acoustic
|
3142
|
+
# models and audio resources; all registered endpoints for the asynchronous HTTP
|
3143
|
+
# interface; and all data related to speech recognition requests.
|
3016
3144
|
#
|
3017
3145
|
# **See also:** [Information
|
3018
|
-
# security](https://cloud.ibm.com/docs/
|
3146
|
+
# security](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-information-security#information-security).
|
3019
3147
|
# @param customer_id [String] The customer ID for which all data is to be deleted.
|
3020
3148
|
# @return [nil]
|
3021
3149
|
def delete_user_data(customer_id:)
|