ibm_watson 1.3.0 → 2.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +33 -5
- data/lib/ibm_watson/assistant_v1.rb +225 -199
- data/lib/ibm_watson/assistant_v2.rb +228 -21
- data/lib/ibm_watson/compare_comply_v1.rb +43 -24
- data/lib/ibm_watson/discovery_v1.rb +144 -19
- data/lib/ibm_watson/discovery_v2.rb +742 -23
- data/lib/ibm_watson/language_translator_v3.rb +216 -64
- data/lib/ibm_watson/natural_language_classifier_v1.rb +11 -3
- data/lib/ibm_watson/natural_language_understanding_v1.rb +32 -26
- data/lib/ibm_watson/personality_insights_v3.rb +22 -14
- data/lib/ibm_watson/speech_to_text_v1.rb +240 -106
- data/lib/ibm_watson/text_to_speech_v1.rb +139 -146
- data/lib/ibm_watson/tone_analyzer_v3.rb +19 -14
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +31 -14
- data/lib/ibm_watson/visual_recognition_v4.rb +112 -22
- data/test/integration/test_assistant_v1.rb +9 -0
- data/test/integration/test_assistant_v2.rb +34 -0
- data/test/integration/test_compare_comply_v1.rb +1 -12
- data/test/integration/test_discovery_v2.rb +118 -6
- data/test/integration/test_language_translator_v3.rb +5 -0
- data/test/integration/test_speech_to_text_v1.rb +2 -0
- data/test/integration/test_text_to_speech_v1.rb +3 -3
- data/test/integration/test_visual_recognition_v4.rb +9 -0
- data/test/unit/test_assistant_v1.rb +149 -98
- data/test/unit/test_assistant_v2.rb +153 -8
- data/test/unit/test_compare_comply_v1.rb +20 -20
- data/test/unit/test_discovery_v1.rb +125 -125
- data/test/unit/test_discovery_v2.rb +262 -29
- data/test/unit/test_language_translator_v3.rb +85 -24
- data/test/unit/test_natural_language_classifier_v1.rb +17 -17
- data/test/unit/test_natural_language_understanding_v1.rb +10 -10
- data/test/unit/test_personality_insights_v3.rb +14 -14
- data/test/unit/test_speech_to_text_v1.rb +97 -97
- data/test/unit/test_text_to_speech_v1.rb +48 -48
- data/test/unit/test_tone_analyzer_v3.rb +12 -12
- data/test/unit/test_visual_recognition_v3.rb +16 -16
- data/test/unit/test_visual_recognition_v4.rb +56 -38
- metadata +5 -5
@@ -13,14 +13,16 @@
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
|
-
|
16
|
+
#
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.17.0-8d569e8f-20201030-142059
|
18
|
+
#
|
17
19
|
# Analyze various features of text content at scale. Provide text, raw HTML, or a public
|
18
20
|
# URL and IBM Watson Natural Language Understanding will give you results for the features
|
19
21
|
# you request. The service cleans HTML content before analysis by default, so the results
|
20
22
|
# can ignore most advertisements and other unwanted content.
|
21
23
|
#
|
22
24
|
# You can create [custom
|
23
|
-
# models](https://cloud.ibm.com/docs/
|
25
|
+
# models](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-customizing)
|
24
26
|
# with Watson Knowledge Studio to detect custom entities and relations in Natural Language
|
25
27
|
# Understanding.
|
26
28
|
|
@@ -36,37 +38,36 @@ module IBMWatson
|
|
36
38
|
# The Natural Language Understanding V1 service.
|
37
39
|
class NaturalLanguageUnderstandingV1 < IBMCloudSdkCore::BaseService
|
38
40
|
include Concurrent::Async
|
41
|
+
DEFAULT_SERVICE_NAME = "natural_language_understanding"
|
42
|
+
DEFAULT_SERVICE_URL = "https://api.us-south.natural-language-understanding.watson.cloud.ibm.com"
|
43
|
+
attr_accessor :version
|
39
44
|
##
|
40
45
|
# @!method initialize(args)
|
41
46
|
# Construct a new client for the Natural Language Understanding service.
|
42
47
|
#
|
43
48
|
# @param args [Hash] The args to initialize with
|
44
|
-
# @option args version [String]
|
45
|
-
#
|
46
|
-
# incompatible way, a new minor version of the API is released.
|
47
|
-
# The service uses the API version for the date you specify, or
|
48
|
-
# the most recent version before that date. Note that you should
|
49
|
-
# not programmatically specify the current date at runtime, in
|
50
|
-
# case the API has been updated since your application's release.
|
51
|
-
# Instead, specify a version date that is compatible with your
|
52
|
-
# application, and don't change it until your application is
|
53
|
-
# ready for a later version.
|
49
|
+
# @option args version [String] Release date of the API version you want to use. Specify dates in YYYY-MM-DD
|
50
|
+
# format. The current version is `2020-08-01`.
|
54
51
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
55
52
|
# The base service_url may differ between IBM Cloud regions.
|
56
53
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
54
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
55
|
+
# any external configuration, if applicable.
|
57
56
|
def initialize(args = {})
|
58
57
|
@__async_initialized__ = false
|
59
58
|
defaults = {}
|
60
|
-
defaults[:
|
61
|
-
defaults[:
|
59
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
60
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
62
61
|
defaults[:authenticator] = nil
|
62
|
+
defaults[:version] = nil
|
63
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
63
64
|
args = defaults.merge(args)
|
64
65
|
@version = args[:version]
|
65
66
|
raise ArgumentError.new("version must be provided") if @version.nil?
|
66
67
|
|
67
|
-
args[:service_name] = "natural_language_understanding"
|
68
68
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
69
69
|
super
|
70
|
+
@service_url = user_service_url unless user_service_url.nil?
|
70
71
|
end
|
71
72
|
|
72
73
|
#########################
|
@@ -86,11 +87,12 @@ module IBMWatson
|
|
86
87
|
# - Relations
|
87
88
|
# - Semantic roles
|
88
89
|
# - Sentiment
|
89
|
-
# - Syntax
|
90
|
+
# - Syntax
|
91
|
+
# - Summarization (Experimental)
|
90
92
|
#
|
91
93
|
# If a language for the input text is not specified with the `language` parameter,
|
92
94
|
# the service [automatically detects the
|
93
|
-
# language](https://cloud.ibm.com/docs/
|
95
|
+
# language](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-detectable-languages).
|
94
96
|
# @param features [Features] Specific features to analyze the document for.
|
95
97
|
# @param text [String] The plain text to analyze. One of the `text`, `html`, or `url` parameters is
|
96
98
|
# required.
|
@@ -98,12 +100,11 @@ module IBMWatson
|
|
98
100
|
# required.
|
99
101
|
# @param url [String] The webpage to analyze. One of the `text`, `html`, or `url` parameters is
|
100
102
|
# required.
|
101
|
-
# @param clean [Boolean] Set this to `false` to disable webpage cleaning.
|
102
|
-
# cleaning, see
|
103
|
-
# webpages](https://cloud.ibm.com/docs/
|
104
|
-
# documentation.
|
103
|
+
# @param clean [Boolean] Set this to `false` to disable webpage cleaning. For more information about
|
104
|
+
# webpage cleaning, see [Analyzing
|
105
|
+
# webpages](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages).
|
105
106
|
# @param xpath [String] An [XPath
|
106
|
-
# query](https://cloud.ibm.com/docs/
|
107
|
+
# query](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages#xpath)
|
107
108
|
# to perform on `html` or `url` input. Results of the query will be appended to the
|
108
109
|
# cleaned webpage text before it is analyzed. To analyze only the results of the
|
109
110
|
# XPath query, set the `clean` parameter to `false`.
|
@@ -111,12 +112,13 @@ module IBMWatson
|
|
111
112
|
# @param return_analyzed_text [Boolean] Whether or not to return the analyzed text.
|
112
113
|
# @param language [String] ISO 639-1 code that specifies the language of your text. This overrides automatic
|
113
114
|
# language detection. Language support differs depending on the features you include
|
114
|
-
# in your analysis.
|
115
|
-
# support](https://cloud.ibm.com/docs/
|
116
|
-
# for more information.
|
115
|
+
# in your analysis. For more information, see [Language
|
116
|
+
# support](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-language-support).
|
117
117
|
# @param limit_text_characters [Fixnum] Sets the maximum number of characters that are processed by the service.
|
118
118
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
119
119
|
def analyze(features:, text: nil, html: nil, url: nil, clean: nil, xpath: nil, fallback_to_raw: nil, return_analyzed_text: nil, language: nil, limit_text_characters: nil)
|
120
|
+
raise ArgumentError.new("version must be provided") if version.nil?
|
121
|
+
|
120
122
|
raise ArgumentError.new("features must be provided") if features.nil?
|
121
123
|
|
122
124
|
headers = {
|
@@ -161,10 +163,12 @@ module IBMWatson
|
|
161
163
|
# @!method list_models
|
162
164
|
# List models.
|
163
165
|
# Lists Watson Knowledge Studio [custom entities and relations
|
164
|
-
# models](https://cloud.ibm.com/docs/
|
166
|
+
# models](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-customizing)
|
165
167
|
# that are deployed to your Natural Language Understanding service.
|
166
168
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
167
169
|
def list_models
|
170
|
+
raise ArgumentError.new("version must be provided") if version.nil?
|
171
|
+
|
168
172
|
headers = {
|
169
173
|
}
|
170
174
|
sdk_headers = Common.new.get_sdk_headers("natural-language-understanding", "V1", "list_models")
|
@@ -193,6 +197,8 @@ module IBMWatson
|
|
193
197
|
# @param model_id [String] Model ID of the model to delete.
|
194
198
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
195
199
|
def delete_model(model_id:)
|
200
|
+
raise ArgumentError.new("version must be provided") if version.nil?
|
201
|
+
|
196
202
|
raise ArgumentError.new("model_id must be provided") if model_id.nil?
|
197
203
|
|
198
204
|
headers = {
|
@@ -13,7 +13,9 @@
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
|
-
|
16
|
+
#
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.17.0-8d569e8f-20201030-142059
|
18
|
+
#
|
17
19
|
# The IBM Watson™ Personality Insights service enables applications to derive
|
18
20
|
# insights from social media, enterprise data, or other digital communications. The
|
19
21
|
# service uses linguistic analytics to infer individuals' intrinsic personality
|
@@ -47,37 +49,41 @@ module IBMWatson
|
|
47
49
|
# The Personality Insights V3 service.
|
48
50
|
class PersonalityInsightsV3 < IBMCloudSdkCore::BaseService
|
49
51
|
include Concurrent::Async
|
52
|
+
DEFAULT_SERVICE_NAME = "personality_insights"
|
53
|
+
DEFAULT_SERVICE_URL = "https://api.us-south.personality-insights.watson.cloud.ibm.com"
|
54
|
+
attr_accessor :version
|
50
55
|
##
|
51
56
|
# @!method initialize(args)
|
52
57
|
# Construct a new client for the Personality Insights service.
|
53
58
|
#
|
54
59
|
# @param args [Hash] The args to initialize with
|
55
|
-
# @option args version [String]
|
56
|
-
#
|
57
|
-
# incompatible way, a new minor version of the API is released.
|
58
|
-
# The service uses the API version for the date you specify, or
|
59
|
-
# the most recent version before that date. Note that you should
|
60
|
-
# not programmatically specify the current date at runtime, in
|
61
|
-
# case the API has been updated since your application's release.
|
62
|
-
# Instead, specify a version date that is compatible with your
|
63
|
-
# application, and don't change it until your application is
|
64
|
-
# ready for a later version.
|
60
|
+
# @option args version [String] Release date of the version of the API you want to use. Specify dates in
|
61
|
+
# YYYY-MM-DD format. The current version is `2017-10-13`.
|
65
62
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
66
63
|
# The base service_url may differ between IBM Cloud regions.
|
67
64
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
65
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
66
|
+
# any external configuration, if applicable.
|
67
|
+
#
|
68
|
+
# @deprecated On 1 December 2021, Personality Insights will no longer be available.
|
69
|
+
# Consider migrating to Watson Natural Language Understanding.
|
70
|
+
# For more information, see [Personality Insights Deprecation](https://github.com/watson-developer-cloud/ruby-sdk/tree/master#personality-insights-deprecation).
|
68
71
|
def initialize(args = {})
|
72
|
+
warn "On 1 December 2021, Personality Insights will no longer be available. For more information, see the README."
|
69
73
|
@__async_initialized__ = false
|
70
74
|
defaults = {}
|
71
|
-
defaults[:
|
72
|
-
defaults[:
|
75
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
76
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
73
77
|
defaults[:authenticator] = nil
|
78
|
+
defaults[:version] = nil
|
79
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
74
80
|
args = defaults.merge(args)
|
75
81
|
@version = args[:version]
|
76
82
|
raise ArgumentError.new("version must be provided") if @version.nil?
|
77
83
|
|
78
|
-
args[:service_name] = "personality_insights"
|
79
84
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
80
85
|
super
|
86
|
+
@service_url = user_service_url unless user_service_url.nil?
|
81
87
|
end
|
82
88
|
|
83
89
|
#########################
|
@@ -164,6 +170,8 @@ module IBMWatson
|
|
164
170
|
# default, no consumption preferences are returned.
|
165
171
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
166
172
|
def profile(content:, accept:, content_type: nil, content_language: nil, accept_language: nil, raw_scores: nil, csv_headers: nil, consumption_preferences: nil)
|
173
|
+
raise ArgumentError.new("version must be provided") if version.nil?
|
174
|
+
|
167
175
|
raise ArgumentError.new("content must be provided") if content.nil?
|
168
176
|
|
169
177
|
raise ArgumentError.new("accept must be provided") if accept.nil?
|
@@ -13,13 +13,15 @@
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
|
-
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
16
|
+
#
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.17.0-8d569e8f-20201030-142059
|
18
|
+
#
|
19
|
+
# The IBM Watson™ Speech to Text service provides APIs that use IBM's
|
20
|
+
# speech-recognition capabilities to produce transcripts of spoken audio. The service can
|
21
|
+
# transcribe speech from various languages and audio formats. In addition to basic
|
22
|
+
# transcription, the service can produce detailed information about many different aspects
|
23
|
+
# of the audio. For most languages, the service supports two sampling rates, broadband and
|
24
|
+
# narrowband. It returns all JSON response content in the UTF-8 character set.
|
23
25
|
#
|
24
26
|
# For speech recognition, the service supports synchronous and asynchronous HTTP
|
25
27
|
# Representational State Transfer (REST) interfaces. It also supports a WebSocket
|
@@ -34,9 +36,9 @@
|
|
34
36
|
# is a formal language specification that lets you restrict the phrases that the service
|
35
37
|
# can recognize.
|
36
38
|
#
|
37
|
-
# Language model customization
|
38
|
-
#
|
39
|
-
#
|
39
|
+
# Language model customization and acoustic model customization are generally available
|
40
|
+
# for production use with all language models that are generally available. Grammars are
|
41
|
+
# beta functionality for all language models that support language model customization.
|
40
42
|
|
41
43
|
require "concurrent"
|
42
44
|
require "erb"
|
@@ -50,6 +52,8 @@ module IBMWatson
|
|
50
52
|
# The Speech to Text V1 service.
|
51
53
|
class SpeechToTextV1 < IBMCloudSdkCore::BaseService
|
52
54
|
include Concurrent::Async
|
55
|
+
DEFAULT_SERVICE_NAME = "speech_to_text"
|
56
|
+
DEFAULT_SERVICE_URL = "https://api.us-south.speech-to-text.watson.cloud.ibm.com"
|
53
57
|
##
|
54
58
|
# @!method initialize(args)
|
55
59
|
# Construct a new client for the Speech to Text service.
|
@@ -58,15 +62,19 @@ module IBMWatson
|
|
58
62
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
59
63
|
# The base service_url may differ between IBM Cloud regions.
|
60
64
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
65
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
66
|
+
# any external configuration, if applicable.
|
61
67
|
def initialize(args = {})
|
62
68
|
@__async_initialized__ = false
|
63
69
|
defaults = {}
|
64
|
-
defaults[:service_url] =
|
70
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
71
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
65
72
|
defaults[:authenticator] = nil
|
73
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
66
74
|
args = defaults.merge(args)
|
67
|
-
args[:service_name] = "speech_to_text"
|
68
75
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
69
76
|
super
|
77
|
+
@service_url = user_service_url unless user_service_url.nil?
|
70
78
|
end
|
71
79
|
|
72
80
|
#########################
|
@@ -78,7 +86,8 @@ module IBMWatson
|
|
78
86
|
# List models.
|
79
87
|
# Lists all language models that are available for use with the service. The
|
80
88
|
# information includes the name of the model and its minimum sampling rate in Hertz,
|
81
|
-
# among other things.
|
89
|
+
# among other things. The ordering of the list of models can change from call to
|
90
|
+
# call; do not rely on an alphabetized or static list of models.
|
82
91
|
#
|
83
92
|
# **See also:** [Languages and
|
84
93
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
@@ -135,7 +144,7 @@ module IBMWatson
|
|
135
144
|
#########################
|
136
145
|
|
137
146
|
##
|
138
|
-
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
147
|
+
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
139
148
|
# Recognize audio.
|
140
149
|
# Sends audio and returns transcription results for a recognition request. You can
|
141
150
|
# pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
|
@@ -223,7 +232,7 @@ module IBMWatson
|
|
223
232
|
#
|
224
233
|
# **See also:** [Making a multipart HTTP
|
225
234
|
# request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-multi).
|
226
|
-
# @param audio [
|
235
|
+
# @param audio [File] The audio to transcribe.
|
227
236
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
228
237
|
# audio format, see **Audio formats (content types)** in the method description.
|
229
238
|
# @param model [String] The identifier of the model that is to be used for the recognition request. See
|
@@ -277,8 +286,14 @@ module IBMWatson
|
|
277
286
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
278
287
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
279
288
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
280
|
-
# threshold.
|
281
|
-
#
|
289
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
290
|
+
# keywords.
|
291
|
+
#
|
292
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
293
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
294
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
295
|
+
#
|
296
|
+
# See [Keyword
|
282
297
|
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
283
298
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
284
299
|
# considered to match a keyword if its confidence is greater than or equal to the
|
@@ -323,11 +338,9 @@ module IBMWatson
|
|
323
338
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
324
339
|
# parameter.
|
325
340
|
#
|
326
|
-
# **Note:** Applies to US English,
|
327
|
-
# narrowband models) and UK English (narrowband model)
|
328
|
-
#
|
329
|
-
# **Get a model** method and check that the attribute `speaker_labels` is set to
|
330
|
-
# `true`.
|
341
|
+
# **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
|
342
|
+
# Spanish (both broadband and narrowband models) and UK English (narrowband model)
|
343
|
+
# transcription only.
|
331
344
|
#
|
332
345
|
# See [Speaker
|
333
346
|
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
@@ -388,8 +401,33 @@ module IBMWatson
|
|
388
401
|
#
|
389
402
|
# See [Split transcript at phrase
|
390
403
|
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
404
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
405
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
406
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
407
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
408
|
+
# activity.
|
409
|
+
#
|
410
|
+
# Specify a value between 0.0 and 1.0:
|
411
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
412
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
413
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
414
|
+
#
|
415
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
416
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
417
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
418
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
419
|
+
# conversations or background noise.
|
420
|
+
#
|
421
|
+
# Specify a value in the range of 0.0 to 1.0:
|
422
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
423
|
+
# disabled).
|
424
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
425
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
426
|
+
#
|
427
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
428
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
391
429
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
392
|
-
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
430
|
+
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
393
431
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
394
432
|
|
395
433
|
headers = {
|
@@ -420,7 +458,9 @@ module IBMWatson
|
|
420
458
|
"redaction" => redaction,
|
421
459
|
"audio_metrics" => audio_metrics,
|
422
460
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
423
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
461
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
462
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
463
|
+
"background_audio_suppression" => background_audio_suppression
|
424
464
|
}
|
425
465
|
|
426
466
|
data = audio
|
@@ -439,7 +479,7 @@ module IBMWatson
|
|
439
479
|
end
|
440
480
|
|
441
481
|
##
|
442
|
-
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
482
|
+
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
443
483
|
# Sends audio for speech recognition using web sockets.
|
444
484
|
# @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
|
445
485
|
# @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
|
@@ -449,7 +489,7 @@ module IBMWatson
|
|
449
489
|
# @param customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
|
450
490
|
# @param acoustic_customization_id [String] The GUID of a custom acoustic model that is to be used with the request. The base model of the specified custom acoustic model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom acoustic model is used.
|
451
491
|
# @param language_customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
|
452
|
-
# @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://
|
492
|
+
# @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
|
453
493
|
# @param inactivity_timeout [Integer] The time in seconds after which, if only silence (no speech) is detected in submitted audio, the connection is closed with a 400 error. Useful for stopping audio submission from a live microphone when a user simply walks away. Use `-1` for infinity.
|
454
494
|
# @param interim_results [Boolean] Send back non-final previews of each "sentence" as it is being processed. These results are ignored in text mode.
|
455
495
|
# @param keywords [Array<String>] Array of keyword strings to spot in the audio. Each keyword string can include one or more tokens. Keywords are spotted only in the final hypothesis, not in interim results. If you specify any keywords, you must also specify a keywords threshold. Omit the parameter or specify an empty array if you do not need to spot keywords.
|
@@ -460,13 +500,13 @@ module IBMWatson
|
|
460
500
|
# @param timestamps [Boolean] If `true`, time alignment for each word is returned.
|
461
501
|
# @param profanity_filter [Boolean] If `true` (the default), filters profanity from all output except for keyword results by replacing inappropriate words with a series of asterisks. Set the parameter to `false` to return results with no censoring. Applies to US English transcription only.
|
462
502
|
# @param smart_formatting [Boolean] If `true`, converts dates, times, series of digits and numbers, phone numbers, currency values, and Internet addresses into more readable, conventional representations in the final transcript of a recognition request. If `false` (the default), no formatting is performed. Applies to US English transcription only.
|
463
|
-
# @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://
|
503
|
+
# @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
464
504
|
# @param grammar_name [String] The name of a grammar that is to be used with the recognition request. If you
|
465
505
|
# specify a grammar, you must also use the `language_customization_id` parameter to
|
466
506
|
# specify the name of the custom language model for which the grammar is defined.
|
467
507
|
# The service recognizes only strings that are recognized by the specified grammar;
|
468
508
|
# it does not recognize other custom words from the model's words resource. See
|
469
|
-
# [Grammars](https://cloud.ibm.com/docs/
|
509
|
+
# [Grammars](https://cloud.ibm.com/docs/speech-to-text/output.html).
|
470
510
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
471
511
|
# feature redacts any number that has three or more consecutive digits by replacing
|
472
512
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -481,7 +521,7 @@ module IBMWatson
|
|
481
521
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
482
522
|
#
|
483
523
|
# See [Numeric
|
484
|
-
# redaction](https://cloud.ibm.com/docs/
|
524
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
|
485
525
|
#
|
486
526
|
# @param processing_metrics [Boolean] If `true`, requests processing metrics about the service's transcription of the
|
487
527
|
# input audio. The service returns processing metrics at the interval specified by
|
@@ -503,7 +543,7 @@ module IBMWatson
|
|
503
543
|
# @return [WebSocketClient] Returns a new WebSocketClient object
|
504
544
|
#
|
505
545
|
# See [Audio
|
506
|
-
# metrics](https://cloud.ibm.com/docs/
|
546
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
|
507
547
|
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
508
548
|
# splits a transcript into multiple final results. If the service detects pauses or
|
509
549
|
# extended silence before it reaches the end of the audio stream, its response can
|
@@ -520,7 +560,7 @@ module IBMWatson
|
|
520
560
|
# Chinese is 0.6 seconds.
|
521
561
|
#
|
522
562
|
# See [End of phrase silence
|
523
|
-
# time](https://cloud.ibm.com/docs/
|
563
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
|
524
564
|
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
525
565
|
# based on semantic features of the input, for example, at the conclusion of
|
526
566
|
# meaningful phrases such as sentences. The service bases its understanding of
|
@@ -530,7 +570,33 @@ module IBMWatson
|
|
530
570
|
# interval.
|
531
571
|
#
|
532
572
|
# See [Split transcript at phrase
|
533
|
-
# end](https://cloud.ibm.com/docs/
|
573
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
574
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
575
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
576
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
577
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
578
|
+
# activity.
|
579
|
+
#
|
580
|
+
# Specify a value between 0.0 and 1.0:
|
581
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
582
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
583
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
584
|
+
#
|
585
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
586
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
587
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
588
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
589
|
+
# conversations or background noise.
|
590
|
+
#
|
591
|
+
# Specify a value in the range of 0.0 to 1.0:
|
592
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
593
|
+
# disabled).
|
594
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
595
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
596
|
+
#
|
597
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
598
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
599
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
534
600
|
def recognize_using_websocket(
|
535
601
|
content_type: nil,
|
536
602
|
recognize_callback:,
|
@@ -559,7 +625,9 @@ module IBMWatson
|
|
559
625
|
processing_metrics_interval: nil,
|
560
626
|
audio_metrics: nil,
|
561
627
|
end_of_phrase_silence_time: nil,
|
562
|
-
split_transcript_at_phrase_end: nil
|
628
|
+
split_transcript_at_phrase_end: nil,
|
629
|
+
speech_detector_sensitivity: nil,
|
630
|
+
background_audio_suppression: nil
|
563
631
|
)
|
564
632
|
raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
|
565
633
|
raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
|
@@ -568,6 +636,7 @@ module IBMWatson
|
|
568
636
|
require_relative("./websocket/speech_to_text_websocket_listener.rb")
|
569
637
|
headers = {}
|
570
638
|
headers = conn.default_options.headers.to_hash unless conn.default_options.headers.to_hash.empty?
|
639
|
+
@authenticator.authenticate(headers)
|
571
640
|
service_url = @service_url.gsub("https:", "wss:")
|
572
641
|
params = {
|
573
642
|
"model" => model,
|
@@ -598,7 +667,9 @@ module IBMWatson
|
|
598
667
|
"processing_metrics_interval" => processing_metrics_interval,
|
599
668
|
"audio_metrics" => audio_metrics,
|
600
669
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
601
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
670
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
671
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
672
|
+
"background_audio_suppression" => background_audio_suppression
|
602
673
|
}
|
603
674
|
options.delete_if { |_, v| v.nil? }
|
604
675
|
WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
|
@@ -611,9 +682,9 @@ module IBMWatson
|
|
611
682
|
# @!method register_callback(callback_url:, user_secret: nil)
|
612
683
|
# Register a callback.
|
613
684
|
# Registers a callback URL with the service for use with subsequent asynchronous
|
614
|
-
# recognition requests. The service attempts to register, or
|
615
|
-
#
|
616
|
-
#
|
685
|
+
# recognition requests. The service attempts to register, or allowlist, the callback
|
686
|
+
# URL if it is not already registered by sending a `GET` request to the callback
|
687
|
+
# URL. The service passes a random alphanumeric challenge string via the
|
617
688
|
# `challenge_string` parameter of the request. The request includes an `Accept`
|
618
689
|
# header that specifies `text/plain` as the required response type.
|
619
690
|
#
|
@@ -625,9 +696,9 @@ module IBMWatson
|
|
625
696
|
#
|
626
697
|
# The service sends only a single `GET` request to the callback URL. If the service
|
627
698
|
# does not receive a reply with a response code of 200 and a body that echoes the
|
628
|
-
# challenge string sent by the service within five seconds, it does not
|
699
|
+
# challenge string sent by the service within five seconds, it does not allowlist
|
629
700
|
# the URL; it instead sends status code 400 in response to the **Register a
|
630
|
-
# callback** request. If the requested callback URL is already
|
701
|
+
# callback** request. If the requested callback URL is already allowlisted, the
|
631
702
|
# service responds to the initial registration request with response code 200.
|
632
703
|
#
|
633
704
|
# If you specify a user secret with the request, the service uses it as a key to
|
@@ -645,7 +716,7 @@ module IBMWatson
|
|
645
716
|
# **See also:** [Registering a callback
|
646
717
|
# URL](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#register).
|
647
718
|
# @param callback_url [String] An HTTP or HTTPS URL to which callback notifications are to be sent. To be
|
648
|
-
#
|
719
|
+
# allowlisted, the URL must successfully echo the challenge string during URL
|
649
720
|
# verification. During verification, the client can also check the signature that
|
650
721
|
# the service sends in the `X-Callback-Signature` header to verify the origin of the
|
651
722
|
# request.
|
@@ -683,7 +754,7 @@ module IBMWatson
|
|
683
754
|
##
|
684
755
|
# @!method unregister_callback(callback_url:)
|
685
756
|
# Unregister a callback.
|
686
|
-
# Unregisters a callback URL that was previously
|
757
|
+
# Unregisters a callback URL that was previously allowlisted with a **Register a
|
687
758
|
# callback** request for use with the asynchronous interface. Once unregistered, the
|
688
759
|
# URL can no longer be used with asynchronous recognition requests.
|
689
760
|
#
|
@@ -716,7 +787,7 @@ module IBMWatson
|
|
716
787
|
end
|
717
788
|
|
718
789
|
##
|
719
|
-
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
790
|
+
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
720
791
|
# Create a job.
|
721
792
|
# Creates a job for a new asynchronous recognition request. The job is owned by the
|
722
793
|
# instance of the service whose credentials are used to create it. How you learn the
|
@@ -814,14 +885,14 @@ module IBMWatson
|
|
814
885
|
#
|
815
886
|
# **See also:** [Audio
|
816
887
|
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
|
817
|
-
# @param audio [
|
888
|
+
# @param audio [File] The audio to transcribe.
|
818
889
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
819
890
|
# audio format, see **Audio formats (content types)** in the method description.
|
820
891
|
# @param model [String] The identifier of the model that is to be used for the recognition request. See
|
821
892
|
# [Languages and
|
822
893
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
823
894
|
# @param callback_url [String] A URL to which callback notifications are to be sent. The URL must already be
|
824
|
-
# successfully
|
895
|
+
# successfully allowlisted by using the **Register a callback** method. You can
|
825
896
|
# include the same callback URL with any number of job creation requests. Omit the
|
826
897
|
# parameter to poll the service for job completion and results.
|
827
898
|
#
|
@@ -902,8 +973,14 @@ module IBMWatson
|
|
902
973
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
903
974
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
904
975
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
905
|
-
# threshold.
|
906
|
-
#
|
976
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
977
|
+
# keywords.
|
978
|
+
#
|
979
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
980
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
981
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
982
|
+
#
|
983
|
+
# See [Keyword
|
907
984
|
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
908
985
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
909
986
|
# considered to match a keyword if its confidence is greater than or equal to the
|
@@ -948,11 +1025,9 @@ module IBMWatson
|
|
948
1025
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
949
1026
|
# parameter.
|
950
1027
|
#
|
951
|
-
# **Note:** Applies to US English,
|
952
|
-
# narrowband models) and UK English (narrowband model)
|
953
|
-
#
|
954
|
-
# **Get a model** method and check that the attribute `speaker_labels` is set to
|
955
|
-
# `true`.
|
1028
|
+
# **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
|
1029
|
+
# Spanish (both broadband and narrowband models) and UK English (narrowband model)
|
1030
|
+
# transcription only.
|
956
1031
|
#
|
957
1032
|
# See [Speaker
|
958
1033
|
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
@@ -1035,8 +1110,33 @@ module IBMWatson
|
|
1035
1110
|
#
|
1036
1111
|
# See [Split transcript at phrase
|
1037
1112
|
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
1113
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
1114
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
1115
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
1116
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
1117
|
+
# activity.
|
1118
|
+
#
|
1119
|
+
# Specify a value between 0.0 and 1.0:
|
1120
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
1121
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
1122
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
1123
|
+
#
|
1124
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1125
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1126
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
1127
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
1128
|
+
# conversations or background noise.
|
1129
|
+
#
|
1130
|
+
# Specify a value in the range of 0.0 to 1.0:
|
1131
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
1132
|
+
# disabled).
|
1133
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
1134
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
1135
|
+
#
|
1136
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1137
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1038
1138
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1039
|
-
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
1139
|
+
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
1040
1140
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
1041
1141
|
|
1042
1142
|
headers = {
|
@@ -1073,7 +1173,9 @@ module IBMWatson
|
|
1073
1173
|
"processing_metrics_interval" => processing_metrics_interval,
|
1074
1174
|
"audio_metrics" => audio_metrics,
|
1075
1175
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
1076
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
1176
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
1177
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
1178
|
+
"background_audio_suppression" => background_audio_suppression
|
1077
1179
|
}
|
1078
1180
|
|
1079
1181
|
data = audio
|
@@ -1290,8 +1392,12 @@ module IBMWatson
|
|
1290
1392
|
# **See also:** [Listing custom language
|
1291
1393
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language).
|
1292
1394
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
1293
|
-
# are to be returned
|
1294
|
-
#
|
1395
|
+
# are to be returned. Omit the parameter to see all custom language or custom
|
1396
|
+
# acoustic models that are owned by the requesting credentials.
|
1397
|
+
#
|
1398
|
+
# To determine the languages for which customization is available, see [Language
|
1399
|
+
# support for
|
1400
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
1295
1401
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1296
1402
|
def list_language_models(language: nil)
|
1297
1403
|
headers = {
|
@@ -1599,18 +1705,20 @@ module IBMWatson
|
|
1599
1705
|
#
|
1600
1706
|
# The call returns an HTTP 201 response code if the corpus is valid. The service
|
1601
1707
|
# then asynchronously processes the contents of the corpus and automatically
|
1602
|
-
# extracts new words that it finds. This can take on the order of
|
1603
|
-
# complete depending on the total number of words and the number of new words in
|
1604
|
-
# corpus, as well as the current load on the service. You cannot submit requests
|
1605
|
-
# add additional resources to the custom model or to train the model until the
|
1708
|
+
# extracts new words that it finds. This operation can take on the order of minutes
|
1709
|
+
# to complete depending on the total number of words and the number of new words in
|
1710
|
+
# the corpus, as well as the current load on the service. You cannot submit requests
|
1711
|
+
# to add additional resources to the custom model or to train the model until the
|
1606
1712
|
# service's analysis of the corpus for the current request completes. Use the **List
|
1607
1713
|
# a corpus** method to check the status of the analysis.
|
1608
1714
|
#
|
1609
1715
|
# The service auto-populates the model's words resource with words from the corpus
|
1610
|
-
# that are not found in its base vocabulary. These are referred to as
|
1611
|
-
# out-of-vocabulary (OOV) words.
|
1612
|
-
#
|
1613
|
-
#
|
1716
|
+
# that are not found in its base vocabulary. These words are referred to as
|
1717
|
+
# out-of-vocabulary (OOV) words. After adding a corpus, you must validate the words
|
1718
|
+
# resource to ensure that each OOV word's definition is complete and valid. You can
|
1719
|
+
# use the **List custom words** method to examine the words resource. You can use
|
1720
|
+
# other words method to eliminate typos and modify how words are pronounced as
|
1721
|
+
# needed.
|
1614
1722
|
#
|
1615
1723
|
# To add a corpus file that has the same name as an existing corpus, set the
|
1616
1724
|
# `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
|
@@ -1627,10 +1735,12 @@ module IBMWatson
|
|
1627
1735
|
# directly.
|
1628
1736
|
#
|
1629
1737
|
# **See also:**
|
1738
|
+
# * [Add a corpus to the custom language
|
1739
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus)
|
1630
1740
|
# * [Working with
|
1631
1741
|
# corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
|
1632
|
-
# * [
|
1633
|
-
#
|
1742
|
+
# * [Validating a words
|
1743
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1634
1744
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1635
1745
|
# the request. You must make the request with credentials for the instance of the
|
1636
1746
|
# service that owns the custom model.
|
@@ -1859,7 +1969,10 @@ module IBMWatson
|
|
1859
1969
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1860
1970
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1861
1971
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1862
|
-
# word.
|
1972
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
1973
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
1974
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
1975
|
+
# valid.
|
1863
1976
|
# * The `display_as` field provides a different way of spelling the word in a
|
1864
1977
|
# transcript. Use the parameter when you want the word to appear different from its
|
1865
1978
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1889,10 +2002,12 @@ module IBMWatson
|
|
1889
2002
|
#
|
1890
2003
|
#
|
1891
2004
|
# **See also:**
|
2005
|
+
# * [Add words to the custom language
|
2006
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
1892
2007
|
# * [Working with custom
|
1893
2008
|
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1894
|
-
# * [
|
1895
|
-
#
|
2009
|
+
# * [Validating a words
|
2010
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1896
2011
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1897
2012
|
# the request. You must make the request with credentials for the instance of the
|
1898
2013
|
# service that owns the custom model.
|
@@ -1948,7 +2063,10 @@ module IBMWatson
|
|
1948
2063
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1949
2064
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1950
2065
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1951
|
-
# word.
|
2066
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
2067
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
2068
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
2069
|
+
# valid.
|
1952
2070
|
# * The `display_as` field provides a different way of spelling the word in a
|
1953
2071
|
# transcript. Use the parameter when you want the word to appear different from its
|
1954
2072
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1960,10 +2078,12 @@ module IBMWatson
|
|
1960
2078
|
# the **List a custom word** method to review the word that you add.
|
1961
2079
|
#
|
1962
2080
|
# **See also:**
|
2081
|
+
# * [Add words to the custom language
|
2082
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
1963
2083
|
# * [Working with custom
|
1964
2084
|
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1965
|
-
# * [
|
1966
|
-
#
|
2085
|
+
# * [Validating a words
|
2086
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1967
2087
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1968
2088
|
# the request. You must make the request with credentials for the instance of the
|
1969
2089
|
# service that owns the custom model.
|
@@ -2147,12 +2267,12 @@ module IBMWatson
|
|
2147
2267
|
#
|
2148
2268
|
# The call returns an HTTP 201 response code if the grammar is valid. The service
|
2149
2269
|
# then asynchronously processes the contents of the grammar and automatically
|
2150
|
-
# extracts new words that it finds. This can take a few seconds
|
2151
|
-
# depending on the size and complexity of the grammar, as well as the
|
2152
|
-
# on the service. You cannot submit requests to add additional
|
2153
|
-
# custom model or to train the model until the service's analysis
|
2154
|
-
# the current request completes. Use the **Get a grammar** method
|
2155
|
-
# status of the analysis.
|
2270
|
+
# extracts new words that it finds. This operation can take a few seconds or minutes
|
2271
|
+
# to complete depending on the size and complexity of the grammar, as well as the
|
2272
|
+
# current load on the service. You cannot submit requests to add additional
|
2273
|
+
# resources to the custom model or to train the model until the service's analysis
|
2274
|
+
# of the grammar for the current request completes. Use the **Get a grammar** method
|
2275
|
+
# to check the status of the analysis.
|
2156
2276
|
#
|
2157
2277
|
# The service populates the model's words resource with any word that is recognized
|
2158
2278
|
# by the grammar that is not found in the model's base vocabulary. These are
|
@@ -2396,8 +2516,12 @@ module IBMWatson
|
|
2396
2516
|
# **See also:** [Listing custom acoustic
|
2397
2517
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic).
|
2398
2518
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
2399
|
-
# are to be returned
|
2400
|
-
#
|
2519
|
+
# are to be returned. Omit the parameter to see all custom language or custom
|
2520
|
+
# acoustic models that are owned by the requesting credentials.
|
2521
|
+
#
|
2522
|
+
# To determine the languages for which customization is available, see [Language
|
2523
|
+
# support for
|
2524
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
2401
2525
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2402
2526
|
def list_acoustic_models(language: nil)
|
2403
2527
|
headers = {
|
@@ -2495,14 +2619,14 @@ module IBMWatson
|
|
2495
2619
|
# it. You must use credentials for the instance of the service that owns a model to
|
2496
2620
|
# train it.
|
2497
2621
|
#
|
2498
|
-
# The training method is asynchronous.
|
2499
|
-
#
|
2500
|
-
#
|
2501
|
-
# training a custom acoustic model takes
|
2502
|
-
#
|
2503
|
-
#
|
2504
|
-
# returns an HTTP 200 response code to indicate that the training
|
2505
|
-
#
|
2622
|
+
# The training method is asynchronous. Training time depends on the cumulative
|
2623
|
+
# amount of audio data that the custom acoustic model contains and the current load
|
2624
|
+
# on the service. When you train or retrain a model, the service uses all of the
|
2625
|
+
# model's audio data in the training. Training a custom acoustic model takes
|
2626
|
+
# approximately as long as the length of its cumulative audio data. For example, it
|
2627
|
+
# takes approximately 2 hours to train a model that contains a total of 2 hours of
|
2628
|
+
# audio. The method returns an HTTP 200 response code to indicate that the training
|
2629
|
+
# process has begun.
|
2506
2630
|
#
|
2507
2631
|
# You can monitor the status of the training by using the **Get a custom acoustic
|
2508
2632
|
# model** method to poll the model's status. Use a loop to check the status once a
|
@@ -2518,8 +2642,9 @@ module IBMWatson
|
|
2518
2642
|
# Train with a custom language model if you have verbatim transcriptions of the
|
2519
2643
|
# audio files that you have added to the custom model or you have either corpora
|
2520
2644
|
# (text files) or a list of words that are relevant to the contents of the audio
|
2521
|
-
# files.
|
2522
|
-
# base model
|
2645
|
+
# files. For training to succeed, both of the custom models must be based on the
|
2646
|
+
# same version of the same base model, and the custom language model must be fully
|
2647
|
+
# trained and available.
|
2523
2648
|
#
|
2524
2649
|
# **See also:**
|
2525
2650
|
# * [Train the custom acoustic
|
@@ -2535,6 +2660,9 @@ module IBMWatson
|
|
2535
2660
|
# another training request or a request to add audio resources to the model.
|
2536
2661
|
# * The custom model contains less than 10 minutes or more than 200 hours of audio
|
2537
2662
|
# data.
|
2663
|
+
# * You passed a custom language model with the `custom_language_model_id` query
|
2664
|
+
# parameter that is not in the available state. A custom language model must be
|
2665
|
+
# fully trained and available to be used to train a custom acoustic model.
|
2538
2666
|
# * You passed an incompatible custom language model with the
|
2539
2667
|
# `custom_language_model_id` query parameter. Both custom models must be based on
|
2540
2668
|
# the same version of the same base model.
|
@@ -2550,8 +2678,8 @@ module IBMWatson
|
|
2550
2678
|
# been trained with verbatim transcriptions of the audio resources or that contains
|
2551
2679
|
# words that are relevant to the contents of the audio resources. The custom
|
2552
2680
|
# language model must be based on the same version of the same base model as the
|
2553
|
-
# custom acoustic model
|
2554
|
-
# custom models.
|
2681
|
+
# custom acoustic model, and the custom language model must be fully trained and
|
2682
|
+
# available. The credentials specified with the request must own both custom models.
|
2555
2683
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2556
2684
|
def train_acoustic_model(customization_id:, custom_language_model_id: nil)
|
2557
2685
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2649,8 +2777,9 @@ module IBMWatson
|
|
2649
2777
|
# service that owns the custom model.
|
2650
2778
|
# @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
|
2651
2779
|
# customization ID (GUID) of that custom language model. The custom language model
|
2652
|
-
# must be upgraded before the custom acoustic model can be upgraded. The
|
2653
|
-
#
|
2780
|
+
# must be upgraded before the custom acoustic model can be upgraded. The custom
|
2781
|
+
# language model must be fully trained and available. The credentials specified with
|
2782
|
+
# the request must own both custom models.
|
2654
2783
|
# @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
|
2655
2784
|
# has been modified since it was last trained. Use this parameter only to force the
|
2656
2785
|
# upgrade of a custom acoustic model that is trained with a custom language model,
|
@@ -2745,14 +2874,14 @@ module IBMWatson
|
|
2745
2874
|
# same name as an existing audio resource, set the `allow_overwrite` parameter to
|
2746
2875
|
# `true`; otherwise, the request fails.
|
2747
2876
|
#
|
2748
|
-
# The method is asynchronous. It can take several seconds to complete
|
2749
|
-
# the duration of the audio and, in the case of an archive file, the
|
2750
|
-
# audio files being processed. The service returns a 201 response
|
2751
|
-
# is valid. It then asynchronously analyzes the contents of the
|
2752
|
-
# and automatically extracts information about the audio such as
|
2753
|
-
# sampling rate, and encoding. You cannot submit requests to train or
|
2754
|
-
# model until the service's analysis of all audio resources for current
|
2755
|
-
# completes.
|
2877
|
+
# The method is asynchronous. It can take several seconds or minutes to complete
|
2878
|
+
# depending on the duration of the audio and, in the case of an archive file, the
|
2879
|
+
# total number of audio files being processed. The service returns a 201 response
|
2880
|
+
# code if the audio is valid. It then asynchronously analyzes the contents of the
|
2881
|
+
# audio file or files and automatically extracts information about the audio such as
|
2882
|
+
# its length, sampling rate, and encoding. You cannot submit requests to train or
|
2883
|
+
# upgrade the model until the service's analysis of all audio resources for current
|
2884
|
+
# requests completes.
|
2756
2885
|
#
|
2757
2886
|
# To determine the status of the service's analysis of the audio, use the **Get an
|
2758
2887
|
# audio resource** method to poll the status of the audio. The method accepts the
|
@@ -2841,7 +2970,7 @@ module IBMWatson
|
|
2841
2970
|
# used, their use is strongly discouraged.)
|
2842
2971
|
# * Do not use the name of an audio resource that has already been added to the
|
2843
2972
|
# custom model.
|
2844
|
-
# @param audio_resource [
|
2973
|
+
# @param audio_resource [File] The audio resource that is to be added to the custom acoustic model, an individual
|
2845
2974
|
# audio file or an archive file.
|
2846
2975
|
#
|
2847
2976
|
# With the `curl` command, use the `--data-binary` option to upload the file for the
|
@@ -3009,10 +3138,15 @@ module IBMWatson
|
|
3009
3138
|
# deletes all data for the customer ID, regardless of the method by which the
|
3010
3139
|
# information was added. The method has no effect if no data is associated with the
|
3011
3140
|
# customer ID. You must issue the request with credentials for the same instance of
|
3012
|
-
# the service that was used to associate the customer ID with the data.
|
3013
|
-
#
|
3014
|
-
#
|
3015
|
-
#
|
3141
|
+
# the service that was used to associate the customer ID with the data. You
|
3142
|
+
# associate a customer ID with data by passing the `X-Watson-Metadata` header with a
|
3143
|
+
# request that passes the data.
|
3144
|
+
#
|
3145
|
+
# **Note:** If you delete an instance of the service from the service console, all
|
3146
|
+
# data associated with that service instance is automatically deleted. This includes
|
3147
|
+
# all custom language models, corpora, grammars, and words; all custom acoustic
|
3148
|
+
# models and audio resources; all registered endpoints for the asynchronous HTTP
|
3149
|
+
# interface; and all data related to speech recognition requests.
|
3016
3150
|
#
|
3017
3151
|
# **See also:** [Information
|
3018
3152
|
# security](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-information-security#information-security).
|