ibm_watson 1.3.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +36 -5
- data/lib/ibm_watson/assistant_v1.rb +225 -199
- data/lib/ibm_watson/assistant_v2.rb +228 -21
- data/lib/ibm_watson/compare_comply_v1.rb +43 -24
- data/lib/ibm_watson/discovery_v1.rb +144 -19
- data/lib/ibm_watson/discovery_v2.rb +742 -23
- data/lib/ibm_watson/language_translator_v3.rb +216 -64
- data/lib/ibm_watson/natural_language_classifier_v1.rb +11 -3
- data/lib/ibm_watson/natural_language_understanding_v1.rb +32 -26
- data/lib/ibm_watson/personality_insights_v3.rb +34 -19
- data/lib/ibm_watson/speech_to_text_v1.rb +239 -106
- data/lib/ibm_watson/text_to_speech_v1.rb +139 -146
- data/lib/ibm_watson/tone_analyzer_v3.rb +19 -14
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +40 -17
- data/lib/ibm_watson/visual_recognition_v4.rb +110 -18
- data/test/integration/test_assistant_v1.rb +9 -0
- data/test/integration/test_assistant_v2.rb +34 -0
- data/test/integration/test_compare_comply_v1.rb +1 -12
- data/test/integration/test_discovery_v2.rb +132 -6
- data/test/integration/test_language_translator_v3.rb +5 -0
- data/test/integration/test_text_to_speech_v1.rb +3 -3
- data/test/integration/test_visual_recognition_v4.rb +9 -0
- data/test/unit/test_assistant_v1.rb +149 -98
- data/test/unit/test_assistant_v2.rb +153 -8
- data/test/unit/test_compare_comply_v1.rb +20 -20
- data/test/unit/test_discovery_v1.rb +125 -125
- data/test/unit/test_discovery_v2.rb +262 -29
- data/test/unit/test_language_translator_v3.rb +85 -24
- data/test/unit/test_natural_language_classifier_v1.rb +17 -17
- data/test/unit/test_natural_language_understanding_v1.rb +10 -10
- data/test/unit/test_personality_insights_v3.rb +14 -14
- data/test/unit/test_speech_to_text_v1.rb +97 -97
- data/test/unit/test_text_to_speech_v1.rb +48 -48
- data/test/unit/test_tone_analyzer_v3.rb +12 -12
- data/test/unit/test_visual_recognition_v3.rb +16 -16
- data/test/unit/test_visual_recognition_v4.rb +56 -38
- metadata +3 -3
@@ -13,14 +13,16 @@
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
|
-
|
16
|
+
#
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.19.0-be3b4618-20201113-200858
|
18
|
+
#
|
17
19
|
# Analyze various features of text content at scale. Provide text, raw HTML, or a public
|
18
20
|
# URL and IBM Watson Natural Language Understanding will give you results for the features
|
19
21
|
# you request. The service cleans HTML content before analysis by default, so the results
|
20
22
|
# can ignore most advertisements and other unwanted content.
|
21
23
|
#
|
22
24
|
# You can create [custom
|
23
|
-
# models](https://cloud.ibm.com/docs/
|
25
|
+
# models](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-customizing)
|
24
26
|
# with Watson Knowledge Studio to detect custom entities and relations in Natural Language
|
25
27
|
# Understanding.
|
26
28
|
|
@@ -36,37 +38,36 @@ module IBMWatson
|
|
36
38
|
# The Natural Language Understanding V1 service.
|
37
39
|
class NaturalLanguageUnderstandingV1 < IBMCloudSdkCore::BaseService
|
38
40
|
include Concurrent::Async
|
41
|
+
DEFAULT_SERVICE_NAME = "natural_language_understanding"
|
42
|
+
DEFAULT_SERVICE_URL = "https://api.us-south.natural-language-understanding.watson.cloud.ibm.com"
|
43
|
+
attr_accessor :version
|
39
44
|
##
|
40
45
|
# @!method initialize(args)
|
41
46
|
# Construct a new client for the Natural Language Understanding service.
|
42
47
|
#
|
43
48
|
# @param args [Hash] The args to initialize with
|
44
|
-
# @option args version [String]
|
45
|
-
#
|
46
|
-
# incompatible way, a new minor version of the API is released.
|
47
|
-
# The service uses the API version for the date you specify, or
|
48
|
-
# the most recent version before that date. Note that you should
|
49
|
-
# not programmatically specify the current date at runtime, in
|
50
|
-
# case the API has been updated since your application's release.
|
51
|
-
# Instead, specify a version date that is compatible with your
|
52
|
-
# application, and don't change it until your application is
|
53
|
-
# ready for a later version.
|
49
|
+
# @option args version [String] Release date of the API version you want to use. Specify dates in YYYY-MM-DD
|
50
|
+
# format. The current version is `2020-08-01`.
|
54
51
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
55
52
|
# The base service_url may differ between IBM Cloud regions.
|
56
53
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
54
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
55
|
+
# any external configuration, if applicable.
|
57
56
|
def initialize(args = {})
|
58
57
|
@__async_initialized__ = false
|
59
58
|
defaults = {}
|
60
|
-
defaults[:
|
61
|
-
defaults[:
|
59
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
60
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
62
61
|
defaults[:authenticator] = nil
|
62
|
+
defaults[:version] = nil
|
63
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
63
64
|
args = defaults.merge(args)
|
64
65
|
@version = args[:version]
|
65
66
|
raise ArgumentError.new("version must be provided") if @version.nil?
|
66
67
|
|
67
|
-
args[:service_name] = "natural_language_understanding"
|
68
68
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
69
69
|
super
|
70
|
+
@service_url = user_service_url unless user_service_url.nil?
|
70
71
|
end
|
71
72
|
|
72
73
|
#########################
|
@@ -86,11 +87,12 @@ module IBMWatson
|
|
86
87
|
# - Relations
|
87
88
|
# - Semantic roles
|
88
89
|
# - Sentiment
|
89
|
-
# - Syntax
|
90
|
+
# - Syntax
|
91
|
+
# - Summarization (Experimental)
|
90
92
|
#
|
91
93
|
# If a language for the input text is not specified with the `language` parameter,
|
92
94
|
# the service [automatically detects the
|
93
|
-
# language](https://cloud.ibm.com/docs/
|
95
|
+
# language](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-detectable-languages).
|
94
96
|
# @param features [Features] Specific features to analyze the document for.
|
95
97
|
# @param text [String] The plain text to analyze. One of the `text`, `html`, or `url` parameters is
|
96
98
|
# required.
|
@@ -98,12 +100,11 @@ module IBMWatson
|
|
98
100
|
# required.
|
99
101
|
# @param url [String] The webpage to analyze. One of the `text`, `html`, or `url` parameters is
|
100
102
|
# required.
|
101
|
-
# @param clean [Boolean] Set this to `false` to disable webpage cleaning.
|
102
|
-
# cleaning, see
|
103
|
-
# webpages](https://cloud.ibm.com/docs/
|
104
|
-
# documentation.
|
103
|
+
# @param clean [Boolean] Set this to `false` to disable webpage cleaning. For more information about
|
104
|
+
# webpage cleaning, see [Analyzing
|
105
|
+
# webpages](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages).
|
105
106
|
# @param xpath [String] An [XPath
|
106
|
-
# query](https://cloud.ibm.com/docs/
|
107
|
+
# query](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages#xpath)
|
107
108
|
# to perform on `html` or `url` input. Results of the query will be appended to the
|
108
109
|
# cleaned webpage text before it is analyzed. To analyze only the results of the
|
109
110
|
# XPath query, set the `clean` parameter to `false`.
|
@@ -111,12 +112,13 @@ module IBMWatson
|
|
111
112
|
# @param return_analyzed_text [Boolean] Whether or not to return the analyzed text.
|
112
113
|
# @param language [String] ISO 639-1 code that specifies the language of your text. This overrides automatic
|
113
114
|
# language detection. Language support differs depending on the features you include
|
114
|
-
# in your analysis.
|
115
|
-
# support](https://cloud.ibm.com/docs/
|
116
|
-
# for more information.
|
115
|
+
# in your analysis. For more information, see [Language
|
116
|
+
# support](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-language-support).
|
117
117
|
# @param limit_text_characters [Fixnum] Sets the maximum number of characters that are processed by the service.
|
118
118
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
119
119
|
def analyze(features:, text: nil, html: nil, url: nil, clean: nil, xpath: nil, fallback_to_raw: nil, return_analyzed_text: nil, language: nil, limit_text_characters: nil)
|
120
|
+
raise ArgumentError.new("version must be provided") if version.nil?
|
121
|
+
|
120
122
|
raise ArgumentError.new("features must be provided") if features.nil?
|
121
123
|
|
122
124
|
headers = {
|
@@ -161,10 +163,12 @@ module IBMWatson
|
|
161
163
|
# @!method list_models
|
162
164
|
# List models.
|
163
165
|
# Lists Watson Knowledge Studio [custom entities and relations
|
164
|
-
# models](https://cloud.ibm.com/docs/
|
166
|
+
# models](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-customizing)
|
165
167
|
# that are deployed to your Natural Language Understanding service.
|
166
168
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
167
169
|
def list_models
|
170
|
+
raise ArgumentError.new("version must be provided") if version.nil?
|
171
|
+
|
168
172
|
headers = {
|
169
173
|
}
|
170
174
|
sdk_headers = Common.new.get_sdk_headers("natural-language-understanding", "V1", "list_models")
|
@@ -193,6 +197,8 @@ module IBMWatson
|
|
193
197
|
# @param model_id [String] Model ID of the model to delete.
|
194
198
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
195
199
|
def delete_model(model_id:)
|
200
|
+
raise ArgumentError.new("version must be provided") if version.nil?
|
201
|
+
|
196
202
|
raise ArgumentError.new("model_id must be provided") if model_id.nil?
|
197
203
|
|
198
204
|
headers = {
|
@@ -13,12 +13,25 @@
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
|
-
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
16
|
+
#
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.19.0-be3b4618-20201113-200858
|
18
|
+
#
|
19
|
+
# IBM® will begin sunsetting IBM Watson™ Personality Insights on 1 December
|
20
|
+
# 2020. For a period of one year from this date, you will still be able to use Watson
|
21
|
+
# Personality Insights. However, as of 1 December 2021, the offering will no longer be
|
22
|
+
# available.<br/><br/>As an alternative, we encourage you to consider migrating to IBM
|
23
|
+
# Watson™ Natural Language Understanding, a service on IBM Cloud® that uses deep
|
24
|
+
# learning to extract data and insights from text such as keywords, categories, sentiment,
|
25
|
+
# emotion, and syntax to provide insights for your business or industry. For more
|
26
|
+
# information, see [About Natural Language
|
27
|
+
# Understanding](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-about).
|
28
|
+
# {: deprecated}
|
29
|
+
#
|
30
|
+
# The IBM Watson Personality Insights service enables applications to derive insights from
|
31
|
+
# social media, enterprise data, or other digital communications. The service uses
|
32
|
+
# linguistic analytics to infer individuals' intrinsic personality characteristics,
|
33
|
+
# including Big Five, Needs, and Values, from digital communications such as email, text
|
34
|
+
# messages, tweets, and forum posts.
|
22
35
|
#
|
23
36
|
# The service can automatically infer, from potentially noisy social media, portraits of
|
24
37
|
# individuals that reflect their personality characteristics. The service can infer
|
@@ -47,37 +60,37 @@ module IBMWatson
|
|
47
60
|
# The Personality Insights V3 service.
|
48
61
|
class PersonalityInsightsV3 < IBMCloudSdkCore::BaseService
|
49
62
|
include Concurrent::Async
|
63
|
+
DEFAULT_SERVICE_NAME = "personality_insights"
|
64
|
+
DEFAULT_SERVICE_URL = "https://api.us-south.personality-insights.watson.cloud.ibm.com"
|
65
|
+
attr_accessor :version
|
50
66
|
##
|
51
67
|
# @!method initialize(args)
|
52
68
|
# Construct a new client for the Personality Insights service.
|
53
69
|
#
|
54
70
|
# @param args [Hash] The args to initialize with
|
55
|
-
# @option args version [String]
|
56
|
-
#
|
57
|
-
# incompatible way, a new minor version of the API is released.
|
58
|
-
# The service uses the API version for the date you specify, or
|
59
|
-
# the most recent version before that date. Note that you should
|
60
|
-
# not programmatically specify the current date at runtime, in
|
61
|
-
# case the API has been updated since your application's release.
|
62
|
-
# Instead, specify a version date that is compatible with your
|
63
|
-
# application, and don't change it until your application is
|
64
|
-
# ready for a later version.
|
71
|
+
# @option args version [String] Release date of the version of the API you want to use. Specify dates in
|
72
|
+
# YYYY-MM-DD format. The current version is `2017-10-13`.
|
65
73
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
66
74
|
# The base service_url may differ between IBM Cloud regions.
|
67
75
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
76
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
77
|
+
# any external configuration, if applicable.
|
68
78
|
def initialize(args = {})
|
79
|
+
warn "On 1 December 2021, Personality Insights will no longer be available. For more information, see https://github.com/watson-developer-cloud/ruby-sdk/tree/master#personality-insights-deprecation."
|
69
80
|
@__async_initialized__ = false
|
70
81
|
defaults = {}
|
71
|
-
defaults[:
|
72
|
-
defaults[:
|
82
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
83
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
73
84
|
defaults[:authenticator] = nil
|
85
|
+
defaults[:version] = nil
|
86
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
74
87
|
args = defaults.merge(args)
|
75
88
|
@version = args[:version]
|
76
89
|
raise ArgumentError.new("version must be provided") if @version.nil?
|
77
90
|
|
78
|
-
args[:service_name] = "personality_insights"
|
79
91
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
80
92
|
super
|
93
|
+
@service_url = user_service_url unless user_service_url.nil?
|
81
94
|
end
|
82
95
|
|
83
96
|
#########################
|
@@ -164,6 +177,8 @@ module IBMWatson
|
|
164
177
|
# default, no consumption preferences are returned.
|
165
178
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
166
179
|
def profile(content:, accept:, content_type: nil, content_language: nil, accept_language: nil, raw_scores: nil, csv_headers: nil, consumption_preferences: nil)
|
180
|
+
raise ArgumentError.new("version must be provided") if version.nil?
|
181
|
+
|
167
182
|
raise ArgumentError.new("content must be provided") if content.nil?
|
168
183
|
|
169
184
|
raise ArgumentError.new("accept must be provided") if accept.nil?
|
@@ -13,13 +13,15 @@
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
|
-
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
16
|
+
#
|
17
|
+
# IBM OpenAPI SDK Code Generator Version: 3.19.0-be3b4618-20201113-200858
|
18
|
+
#
|
19
|
+
# The IBM Watson™ Speech to Text service provides APIs that use IBM's
|
20
|
+
# speech-recognition capabilities to produce transcripts of spoken audio. The service can
|
21
|
+
# transcribe speech from various languages and audio formats. In addition to basic
|
22
|
+
# transcription, the service can produce detailed information about many different aspects
|
23
|
+
# of the audio. For most languages, the service supports two sampling rates, broadband and
|
24
|
+
# narrowband. It returns all JSON response content in the UTF-8 character set.
|
23
25
|
#
|
24
26
|
# For speech recognition, the service supports synchronous and asynchronous HTTP
|
25
27
|
# Representational State Transfer (REST) interfaces. It also supports a WebSocket
|
@@ -34,9 +36,9 @@
|
|
34
36
|
# is a formal language specification that lets you restrict the phrases that the service
|
35
37
|
# can recognize.
|
36
38
|
#
|
37
|
-
# Language model customization
|
38
|
-
#
|
39
|
-
#
|
39
|
+
# Language model customization and acoustic model customization are generally available
|
40
|
+
# for production use with all language models that are generally available. Grammars are
|
41
|
+
# beta functionality for all language models that support language model customization.
|
40
42
|
|
41
43
|
require "concurrent"
|
42
44
|
require "erb"
|
@@ -50,6 +52,8 @@ module IBMWatson
|
|
50
52
|
# The Speech to Text V1 service.
|
51
53
|
class SpeechToTextV1 < IBMCloudSdkCore::BaseService
|
52
54
|
include Concurrent::Async
|
55
|
+
DEFAULT_SERVICE_NAME = "speech_to_text"
|
56
|
+
DEFAULT_SERVICE_URL = "https://api.us-south.speech-to-text.watson.cloud.ibm.com"
|
53
57
|
##
|
54
58
|
# @!method initialize(args)
|
55
59
|
# Construct a new client for the Speech to Text service.
|
@@ -58,15 +62,19 @@ module IBMWatson
|
|
58
62
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
59
63
|
# The base service_url may differ between IBM Cloud regions.
|
60
64
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
65
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
66
|
+
# any external configuration, if applicable.
|
61
67
|
def initialize(args = {})
|
62
68
|
@__async_initialized__ = false
|
63
69
|
defaults = {}
|
64
|
-
defaults[:service_url] =
|
70
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
71
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
65
72
|
defaults[:authenticator] = nil
|
73
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
66
74
|
args = defaults.merge(args)
|
67
|
-
args[:service_name] = "speech_to_text"
|
68
75
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
69
76
|
super
|
77
|
+
@service_url = user_service_url unless user_service_url.nil?
|
70
78
|
end
|
71
79
|
|
72
80
|
#########################
|
@@ -78,7 +86,8 @@ module IBMWatson
|
|
78
86
|
# List models.
|
79
87
|
# Lists all language models that are available for use with the service. The
|
80
88
|
# information includes the name of the model and its minimum sampling rate in Hertz,
|
81
|
-
# among other things.
|
89
|
+
# among other things. The ordering of the list of models can change from call to
|
90
|
+
# call; do not rely on an alphabetized or static list of models.
|
82
91
|
#
|
83
92
|
# **See also:** [Languages and
|
84
93
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
@@ -135,7 +144,7 @@ module IBMWatson
|
|
135
144
|
#########################
|
136
145
|
|
137
146
|
##
|
138
|
-
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
147
|
+
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
139
148
|
# Recognize audio.
|
140
149
|
# Sends audio and returns transcription results for a recognition request. You can
|
141
150
|
# pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
|
@@ -223,7 +232,7 @@ module IBMWatson
|
|
223
232
|
#
|
224
233
|
# **See also:** [Making a multipart HTTP
|
225
234
|
# request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-multi).
|
226
|
-
# @param audio [
|
235
|
+
# @param audio [File] The audio to transcribe.
|
227
236
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
228
237
|
# audio format, see **Audio formats (content types)** in the method description.
|
229
238
|
# @param model [String] The identifier of the model that is to be used for the recognition request. See
|
@@ -277,8 +286,14 @@ module IBMWatson
|
|
277
286
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
278
287
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
279
288
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
280
|
-
# threshold.
|
281
|
-
#
|
289
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
290
|
+
# keywords.
|
291
|
+
#
|
292
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
293
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
294
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
295
|
+
#
|
296
|
+
# See [Keyword
|
282
297
|
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
283
298
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
284
299
|
# considered to match a keyword if its confidence is greater than or equal to the
|
@@ -323,11 +338,9 @@ module IBMWatson
|
|
323
338
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
324
339
|
# parameter.
|
325
340
|
#
|
326
|
-
# **Note:** Applies to US English,
|
327
|
-
# narrowband models) and UK English (narrowband model)
|
328
|
-
#
|
329
|
-
# **Get a model** method and check that the attribute `speaker_labels` is set to
|
330
|
-
# `true`.
|
341
|
+
# **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
|
342
|
+
# Spanish (both broadband and narrowband models) and UK English (narrowband model)
|
343
|
+
# transcription only.
|
331
344
|
#
|
332
345
|
# See [Speaker
|
333
346
|
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
@@ -388,8 +401,33 @@ module IBMWatson
|
|
388
401
|
#
|
389
402
|
# See [Split transcript at phrase
|
390
403
|
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
404
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
405
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
406
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
407
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
408
|
+
# activity.
|
409
|
+
#
|
410
|
+
# Specify a value between 0.0 and 1.0:
|
411
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
412
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
413
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
414
|
+
#
|
415
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
416
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
417
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
418
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
419
|
+
# conversations or background noise.
|
420
|
+
#
|
421
|
+
# Specify a value in the range of 0.0 to 1.0:
|
422
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
423
|
+
# disabled).
|
424
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
425
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
426
|
+
#
|
427
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
428
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
391
429
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
392
|
-
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
430
|
+
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
393
431
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
394
432
|
|
395
433
|
headers = {
|
@@ -420,7 +458,9 @@ module IBMWatson
|
|
420
458
|
"redaction" => redaction,
|
421
459
|
"audio_metrics" => audio_metrics,
|
422
460
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
423
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
461
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
462
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
463
|
+
"background_audio_suppression" => background_audio_suppression
|
424
464
|
}
|
425
465
|
|
426
466
|
data = audio
|
@@ -439,7 +479,7 @@ module IBMWatson
|
|
439
479
|
end
|
440
480
|
|
441
481
|
##
|
442
|
-
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
482
|
+
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
443
483
|
# Sends audio for speech recognition using web sockets.
|
444
484
|
# @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
|
445
485
|
# @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
|
@@ -449,7 +489,7 @@ module IBMWatson
|
|
449
489
|
# @param customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
|
450
490
|
# @param acoustic_customization_id [String] The GUID of a custom acoustic model that is to be used with the request. The base model of the specified custom acoustic model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom acoustic model is used.
|
451
491
|
# @param language_customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
|
452
|
-
# @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://
|
492
|
+
# @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
|
453
493
|
# @param inactivity_timeout [Integer] The time in seconds after which, if only silence (no speech) is detected in submitted audio, the connection is closed with a 400 error. Useful for stopping audio submission from a live microphone when a user simply walks away. Use `-1` for infinity.
|
454
494
|
# @param interim_results [Boolean] Send back non-final previews of each "sentence" as it is being processed. These results are ignored in text mode.
|
455
495
|
# @param keywords [Array<String>] Array of keyword strings to spot in the audio. Each keyword string can include one or more tokens. Keywords are spotted only in the final hypothesis, not in interim results. If you specify any keywords, you must also specify a keywords threshold. Omit the parameter or specify an empty array if you do not need to spot keywords.
|
@@ -460,13 +500,13 @@ module IBMWatson
|
|
460
500
|
# @param timestamps [Boolean] If `true`, time alignment for each word is returned.
|
461
501
|
# @param profanity_filter [Boolean] If `true` (the default), filters profanity from all output except for keyword results by replacing inappropriate words with a series of asterisks. Set the parameter to `false` to return results with no censoring. Applies to US English transcription only.
|
462
502
|
# @param smart_formatting [Boolean] If `true`, converts dates, times, series of digits and numbers, phone numbers, currency values, and Internet addresses into more readable, conventional representations in the final transcript of a recognition request. If `false` (the default), no formatting is performed. Applies to US English transcription only.
|
463
|
-
# @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://
|
503
|
+
# @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
464
504
|
# @param grammar_name [String] The name of a grammar that is to be used with the recognition request. If you
|
465
505
|
# specify a grammar, you must also use the `language_customization_id` parameter to
|
466
506
|
# specify the name of the custom language model for which the grammar is defined.
|
467
507
|
# The service recognizes only strings that are recognized by the specified grammar;
|
468
508
|
# it does not recognize other custom words from the model's words resource. See
|
469
|
-
# [Grammars](https://cloud.ibm.com/docs/
|
509
|
+
# [Grammars](https://cloud.ibm.com/docs/speech-to-text/output.html).
|
470
510
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
471
511
|
# feature redacts any number that has three or more consecutive digits by replacing
|
472
512
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -481,7 +521,7 @@ module IBMWatson
|
|
481
521
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
482
522
|
#
|
483
523
|
# See [Numeric
|
484
|
-
# redaction](https://cloud.ibm.com/docs/
|
524
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
|
485
525
|
#
|
486
526
|
# @param processing_metrics [Boolean] If `true`, requests processing metrics about the service's transcription of the
|
487
527
|
# input audio. The service returns processing metrics at the interval specified by
|
@@ -503,7 +543,7 @@ module IBMWatson
|
|
503
543
|
# @return [WebSocketClient] Returns a new WebSocketClient object
|
504
544
|
#
|
505
545
|
# See [Audio
|
506
|
-
# metrics](https://cloud.ibm.com/docs/
|
546
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
|
507
547
|
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
508
548
|
# splits a transcript into multiple final results. If the service detects pauses or
|
509
549
|
# extended silence before it reaches the end of the audio stream, its response can
|
@@ -520,7 +560,7 @@ module IBMWatson
|
|
520
560
|
# Chinese is 0.6 seconds.
|
521
561
|
#
|
522
562
|
# See [End of phrase silence
|
523
|
-
# time](https://cloud.ibm.com/docs/
|
563
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
|
524
564
|
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
525
565
|
# based on semantic features of the input, for example, at the conclusion of
|
526
566
|
# meaningful phrases such as sentences. The service bases its understanding of
|
@@ -530,7 +570,33 @@ module IBMWatson
|
|
530
570
|
# interval.
|
531
571
|
#
|
532
572
|
# See [Split transcript at phrase
|
533
|
-
# end](https://cloud.ibm.com/docs/
|
573
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
574
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
575
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
576
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
577
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
578
|
+
# activity.
|
579
|
+
#
|
580
|
+
# Specify a value between 0.0 and 1.0:
|
581
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
582
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
583
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
584
|
+
#
|
585
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
586
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
587
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
588
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
589
|
+
# conversations or background noise.
|
590
|
+
#
|
591
|
+
# Specify a value in the range of 0.0 to 1.0:
|
592
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
593
|
+
# disabled).
|
594
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
595
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
596
|
+
#
|
597
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
598
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
599
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
534
600
|
def recognize_using_websocket(
|
535
601
|
content_type: nil,
|
536
602
|
recognize_callback:,
|
@@ -559,7 +625,9 @@ module IBMWatson
|
|
559
625
|
processing_metrics_interval: nil,
|
560
626
|
audio_metrics: nil,
|
561
627
|
end_of_phrase_silence_time: nil,
|
562
|
-
split_transcript_at_phrase_end: nil
|
628
|
+
split_transcript_at_phrase_end: nil,
|
629
|
+
speech_detector_sensitivity: nil,
|
630
|
+
background_audio_suppression: nil
|
563
631
|
)
|
564
632
|
raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
|
565
633
|
raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
|
@@ -599,7 +667,9 @@ module IBMWatson
|
|
599
667
|
"processing_metrics_interval" => processing_metrics_interval,
|
600
668
|
"audio_metrics" => audio_metrics,
|
601
669
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
602
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
670
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
671
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
672
|
+
"background_audio_suppression" => background_audio_suppression
|
603
673
|
}
|
604
674
|
options.delete_if { |_, v| v.nil? }
|
605
675
|
WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
|
@@ -612,9 +682,9 @@ module IBMWatson
|
|
612
682
|
# @!method register_callback(callback_url:, user_secret: nil)
|
613
683
|
# Register a callback.
|
614
684
|
# Registers a callback URL with the service for use with subsequent asynchronous
|
615
|
-
# recognition requests. The service attempts to register, or
|
616
|
-
#
|
617
|
-
#
|
685
|
+
# recognition requests. The service attempts to register, or allowlist, the callback
|
686
|
+
# URL if it is not already registered by sending a `GET` request to the callback
|
687
|
+
# URL. The service passes a random alphanumeric challenge string via the
|
618
688
|
# `challenge_string` parameter of the request. The request includes an `Accept`
|
619
689
|
# header that specifies `text/plain` as the required response type.
|
620
690
|
#
|
@@ -626,9 +696,9 @@ module IBMWatson
|
|
626
696
|
#
|
627
697
|
# The service sends only a single `GET` request to the callback URL. If the service
|
628
698
|
# does not receive a reply with a response code of 200 and a body that echoes the
|
629
|
-
# challenge string sent by the service within five seconds, it does not
|
699
|
+
# challenge string sent by the service within five seconds, it does not allowlist
|
630
700
|
# the URL; it instead sends status code 400 in response to the **Register a
|
631
|
-
# callback** request. If the requested callback URL is already
|
701
|
+
# callback** request. If the requested callback URL is already allowlisted, the
|
632
702
|
# service responds to the initial registration request with response code 200.
|
633
703
|
#
|
634
704
|
# If you specify a user secret with the request, the service uses it as a key to
|
@@ -646,7 +716,7 @@ module IBMWatson
|
|
646
716
|
# **See also:** [Registering a callback
|
647
717
|
# URL](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#register).
|
648
718
|
# @param callback_url [String] An HTTP or HTTPS URL to which callback notifications are to be sent. To be
|
649
|
-
#
|
719
|
+
# allowlisted, the URL must successfully echo the challenge string during URL
|
650
720
|
# verification. During verification, the client can also check the signature that
|
651
721
|
# the service sends in the `X-Callback-Signature` header to verify the origin of the
|
652
722
|
# request.
|
@@ -684,7 +754,7 @@ module IBMWatson
|
|
684
754
|
##
|
685
755
|
# @!method unregister_callback(callback_url:)
|
686
756
|
# Unregister a callback.
|
687
|
-
# Unregisters a callback URL that was previously
|
757
|
+
# Unregisters a callback URL that was previously allowlisted with a **Register a
|
688
758
|
# callback** request for use with the asynchronous interface. Once unregistered, the
|
689
759
|
# URL can no longer be used with asynchronous recognition requests.
|
690
760
|
#
|
@@ -717,7 +787,7 @@ module IBMWatson
|
|
717
787
|
end
|
718
788
|
|
719
789
|
##
|
720
|
-
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
790
|
+
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
721
791
|
# Create a job.
|
722
792
|
# Creates a job for a new asynchronous recognition request. The job is owned by the
|
723
793
|
# instance of the service whose credentials are used to create it. How you learn the
|
@@ -815,14 +885,14 @@ module IBMWatson
|
|
815
885
|
#
|
816
886
|
# **See also:** [Audio
|
817
887
|
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
|
818
|
-
# @param audio [
|
888
|
+
# @param audio [File] The audio to transcribe.
|
819
889
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
820
890
|
# audio format, see **Audio formats (content types)** in the method description.
|
821
891
|
# @param model [String] The identifier of the model that is to be used for the recognition request. See
|
822
892
|
# [Languages and
|
823
893
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
824
894
|
# @param callback_url [String] A URL to which callback notifications are to be sent. The URL must already be
|
825
|
-
# successfully
|
895
|
+
# successfully allowlisted by using the **Register a callback** method. You can
|
826
896
|
# include the same callback URL with any number of job creation requests. Omit the
|
827
897
|
# parameter to poll the service for job completion and results.
|
828
898
|
#
|
@@ -903,8 +973,14 @@ module IBMWatson
|
|
903
973
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
904
974
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
905
975
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
906
|
-
# threshold.
|
907
|
-
#
|
976
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
977
|
+
# keywords.
|
978
|
+
#
|
979
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
980
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
981
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
982
|
+
#
|
983
|
+
# See [Keyword
|
908
984
|
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
909
985
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
910
986
|
# considered to match a keyword if its confidence is greater than or equal to the
|
@@ -949,11 +1025,9 @@ module IBMWatson
|
|
949
1025
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
950
1026
|
# parameter.
|
951
1027
|
#
|
952
|
-
# **Note:** Applies to US English,
|
953
|
-
# narrowband models) and UK English (narrowband model)
|
954
|
-
#
|
955
|
-
# **Get a model** method and check that the attribute `speaker_labels` is set to
|
956
|
-
# `true`.
|
1028
|
+
# **Note:** Applies to US English, Australian English, German, Japanese, Korean, and
|
1029
|
+
# Spanish (both broadband and narrowband models) and UK English (narrowband model)
|
1030
|
+
# transcription only.
|
957
1031
|
#
|
958
1032
|
# See [Speaker
|
959
1033
|
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
@@ -1036,8 +1110,33 @@ module IBMWatson
|
|
1036
1110
|
#
|
1037
1111
|
# See [Split transcript at phrase
|
1038
1112
|
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
1113
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
1114
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
1115
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
1116
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
1117
|
+
# activity.
|
1118
|
+
#
|
1119
|
+
# Specify a value between 0.0 and 1.0:
|
1120
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
1121
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
1122
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
1123
|
+
#
|
1124
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1125
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1126
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
1127
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
1128
|
+
# conversations or background noise.
|
1129
|
+
#
|
1130
|
+
# Specify a value in the range of 0.0 to 1.0:
|
1131
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
1132
|
+
# disabled).
|
1133
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
1134
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
1135
|
+
#
|
1136
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1137
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1039
1138
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1040
|
-
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
1139
|
+
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
1041
1140
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
1042
1141
|
|
1043
1142
|
headers = {
|
@@ -1074,7 +1173,9 @@ module IBMWatson
|
|
1074
1173
|
"processing_metrics_interval" => processing_metrics_interval,
|
1075
1174
|
"audio_metrics" => audio_metrics,
|
1076
1175
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
1077
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
1176
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
1177
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
1178
|
+
"background_audio_suppression" => background_audio_suppression
|
1078
1179
|
}
|
1079
1180
|
|
1080
1181
|
data = audio
|
@@ -1291,8 +1392,12 @@ module IBMWatson
|
|
1291
1392
|
# **See also:** [Listing custom language
|
1292
1393
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language).
|
1293
1394
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
1294
|
-
# are to be returned
|
1295
|
-
#
|
1395
|
+
# are to be returned. Omit the parameter to see all custom language or custom
|
1396
|
+
# acoustic models that are owned by the requesting credentials.
|
1397
|
+
#
|
1398
|
+
# To determine the languages for which customization is available, see [Language
|
1399
|
+
# support for
|
1400
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
1296
1401
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1297
1402
|
def list_language_models(language: nil)
|
1298
1403
|
headers = {
|
@@ -1600,18 +1705,20 @@ module IBMWatson
|
|
1600
1705
|
#
|
1601
1706
|
# The call returns an HTTP 201 response code if the corpus is valid. The service
|
1602
1707
|
# then asynchronously processes the contents of the corpus and automatically
|
1603
|
-
# extracts new words that it finds. This can take on the order of
|
1604
|
-
# complete depending on the total number of words and the number of new words in
|
1605
|
-
# corpus, as well as the current load on the service. You cannot submit requests
|
1606
|
-
# add additional resources to the custom model or to train the model until the
|
1708
|
+
# extracts new words that it finds. This operation can take on the order of minutes
|
1709
|
+
# to complete depending on the total number of words and the number of new words in
|
1710
|
+
# the corpus, as well as the current load on the service. You cannot submit requests
|
1711
|
+
# to add additional resources to the custom model or to train the model until the
|
1607
1712
|
# service's analysis of the corpus for the current request completes. Use the **List
|
1608
1713
|
# a corpus** method to check the status of the analysis.
|
1609
1714
|
#
|
1610
1715
|
# The service auto-populates the model's words resource with words from the corpus
|
1611
|
-
# that are not found in its base vocabulary. These are referred to as
|
1612
|
-
# out-of-vocabulary (OOV) words.
|
1613
|
-
#
|
1614
|
-
#
|
1716
|
+
# that are not found in its base vocabulary. These words are referred to as
|
1717
|
+
# out-of-vocabulary (OOV) words. After adding a corpus, you must validate the words
|
1718
|
+
# resource to ensure that each OOV word's definition is complete and valid. You can
|
1719
|
+
# use the **List custom words** method to examine the words resource. You can use
|
1720
|
+
# other words method to eliminate typos and modify how words are pronounced as
|
1721
|
+
# needed.
|
1615
1722
|
#
|
1616
1723
|
# To add a corpus file that has the same name as an existing corpus, set the
|
1617
1724
|
# `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
|
@@ -1628,10 +1735,12 @@ module IBMWatson
|
|
1628
1735
|
# directly.
|
1629
1736
|
#
|
1630
1737
|
# **See also:**
|
1738
|
+
# * [Add a corpus to the custom language
|
1739
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus)
|
1631
1740
|
# * [Working with
|
1632
1741
|
# corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
|
1633
|
-
# * [
|
1634
|
-
#
|
1742
|
+
# * [Validating a words
|
1743
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1635
1744
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1636
1745
|
# the request. You must make the request with credentials for the instance of the
|
1637
1746
|
# service that owns the custom model.
|
@@ -1860,7 +1969,10 @@ module IBMWatson
|
|
1860
1969
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1861
1970
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1862
1971
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1863
|
-
# word.
|
1972
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
1973
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
1974
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
1975
|
+
# valid.
|
1864
1976
|
# * The `display_as` field provides a different way of spelling the word in a
|
1865
1977
|
# transcript. Use the parameter when you want the word to appear different from its
|
1866
1978
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1890,10 +2002,12 @@ module IBMWatson
|
|
1890
2002
|
#
|
1891
2003
|
#
|
1892
2004
|
# **See also:**
|
2005
|
+
# * [Add words to the custom language
|
2006
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
1893
2007
|
# * [Working with custom
|
1894
2008
|
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1895
|
-
# * [
|
1896
|
-
#
|
2009
|
+
# * [Validating a words
|
2010
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1897
2011
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1898
2012
|
# the request. You must make the request with credentials for the instance of the
|
1899
2013
|
# service that owns the custom model.
|
@@ -1949,7 +2063,10 @@ module IBMWatson
|
|
1949
2063
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1950
2064
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1951
2065
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1952
|
-
# word.
|
2066
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
2067
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
2068
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
2069
|
+
# valid.
|
1953
2070
|
# * The `display_as` field provides a different way of spelling the word in a
|
1954
2071
|
# transcript. Use the parameter when you want the word to appear different from its
|
1955
2072
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1961,10 +2078,12 @@ module IBMWatson
|
|
1961
2078
|
# the **List a custom word** method to review the word that you add.
|
1962
2079
|
#
|
1963
2080
|
# **See also:**
|
2081
|
+
# * [Add words to the custom language
|
2082
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
1964
2083
|
# * [Working with custom
|
1965
2084
|
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1966
|
-
# * [
|
1967
|
-
#
|
2085
|
+
# * [Validating a words
|
2086
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1968
2087
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1969
2088
|
# the request. You must make the request with credentials for the instance of the
|
1970
2089
|
# service that owns the custom model.
|
@@ -2148,12 +2267,12 @@ module IBMWatson
|
|
2148
2267
|
#
|
2149
2268
|
# The call returns an HTTP 201 response code if the grammar is valid. The service
|
2150
2269
|
# then asynchronously processes the contents of the grammar and automatically
|
2151
|
-
# extracts new words that it finds. This can take a few seconds
|
2152
|
-
# depending on the size and complexity of the grammar, as well as the
|
2153
|
-
# on the service. You cannot submit requests to add additional
|
2154
|
-
# custom model or to train the model until the service's analysis
|
2155
|
-
# the current request completes. Use the **Get a grammar** method
|
2156
|
-
# status of the analysis.
|
2270
|
+
# extracts new words that it finds. This operation can take a few seconds or minutes
|
2271
|
+
# to complete depending on the size and complexity of the grammar, as well as the
|
2272
|
+
# current load on the service. You cannot submit requests to add additional
|
2273
|
+
# resources to the custom model or to train the model until the service's analysis
|
2274
|
+
# of the grammar for the current request completes. Use the **Get a grammar** method
|
2275
|
+
# to check the status of the analysis.
|
2157
2276
|
#
|
2158
2277
|
# The service populates the model's words resource with any word that is recognized
|
2159
2278
|
# by the grammar that is not found in the model's base vocabulary. These are
|
@@ -2397,8 +2516,12 @@ module IBMWatson
|
|
2397
2516
|
# **See also:** [Listing custom acoustic
|
2398
2517
|
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic).
|
2399
2518
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
2400
|
-
# are to be returned
|
2401
|
-
#
|
2519
|
+
# are to be returned. Omit the parameter to see all custom language or custom
|
2520
|
+
# acoustic models that are owned by the requesting credentials.
|
2521
|
+
#
|
2522
|
+
# To determine the languages for which customization is available, see [Language
|
2523
|
+
# support for
|
2524
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
2402
2525
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2403
2526
|
def list_acoustic_models(language: nil)
|
2404
2527
|
headers = {
|
@@ -2496,14 +2619,14 @@ module IBMWatson
|
|
2496
2619
|
# it. You must use credentials for the instance of the service that owns a model to
|
2497
2620
|
# train it.
|
2498
2621
|
#
|
2499
|
-
# The training method is asynchronous.
|
2500
|
-
#
|
2501
|
-
#
|
2502
|
-
# training a custom acoustic model takes
|
2503
|
-
#
|
2504
|
-
#
|
2505
|
-
# returns an HTTP 200 response code to indicate that the training
|
2506
|
-
#
|
2622
|
+
# The training method is asynchronous. Training time depends on the cumulative
|
2623
|
+
# amount of audio data that the custom acoustic model contains and the current load
|
2624
|
+
# on the service. When you train or retrain a model, the service uses all of the
|
2625
|
+
# model's audio data in the training. Training a custom acoustic model takes
|
2626
|
+
# approximately as long as the length of its cumulative audio data. For example, it
|
2627
|
+
# takes approximately 2 hours to train a model that contains a total of 2 hours of
|
2628
|
+
# audio. The method returns an HTTP 200 response code to indicate that the training
|
2629
|
+
# process has begun.
|
2507
2630
|
#
|
2508
2631
|
# You can monitor the status of the training by using the **Get a custom acoustic
|
2509
2632
|
# model** method to poll the model's status. Use a loop to check the status once a
|
@@ -2519,8 +2642,9 @@ module IBMWatson
|
|
2519
2642
|
# Train with a custom language model if you have verbatim transcriptions of the
|
2520
2643
|
# audio files that you have added to the custom model or you have either corpora
|
2521
2644
|
# (text files) or a list of words that are relevant to the contents of the audio
|
2522
|
-
# files.
|
2523
|
-
# base model
|
2645
|
+
# files. For training to succeed, both of the custom models must be based on the
|
2646
|
+
# same version of the same base model, and the custom language model must be fully
|
2647
|
+
# trained and available.
|
2524
2648
|
#
|
2525
2649
|
# **See also:**
|
2526
2650
|
# * [Train the custom acoustic
|
@@ -2536,6 +2660,9 @@ module IBMWatson
|
|
2536
2660
|
# another training request or a request to add audio resources to the model.
|
2537
2661
|
# * The custom model contains less than 10 minutes or more than 200 hours of audio
|
2538
2662
|
# data.
|
2663
|
+
# * You passed a custom language model with the `custom_language_model_id` query
|
2664
|
+
# parameter that is not in the available state. A custom language model must be
|
2665
|
+
# fully trained and available to be used to train a custom acoustic model.
|
2539
2666
|
# * You passed an incompatible custom language model with the
|
2540
2667
|
# `custom_language_model_id` query parameter. Both custom models must be based on
|
2541
2668
|
# the same version of the same base model.
|
@@ -2551,8 +2678,8 @@ module IBMWatson
|
|
2551
2678
|
# been trained with verbatim transcriptions of the audio resources or that contains
|
2552
2679
|
# words that are relevant to the contents of the audio resources. The custom
|
2553
2680
|
# language model must be based on the same version of the same base model as the
|
2554
|
-
# custom acoustic model
|
2555
|
-
# custom models.
|
2681
|
+
# custom acoustic model, and the custom language model must be fully trained and
|
2682
|
+
# available. The credentials specified with the request must own both custom models.
|
2556
2683
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2557
2684
|
def train_acoustic_model(customization_id:, custom_language_model_id: nil)
|
2558
2685
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2650,8 +2777,9 @@ module IBMWatson
|
|
2650
2777
|
# service that owns the custom model.
|
2651
2778
|
# @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
|
2652
2779
|
# customization ID (GUID) of that custom language model. The custom language model
|
2653
|
-
# must be upgraded before the custom acoustic model can be upgraded. The
|
2654
|
-
#
|
2780
|
+
# must be upgraded before the custom acoustic model can be upgraded. The custom
|
2781
|
+
# language model must be fully trained and available. The credentials specified with
|
2782
|
+
# the request must own both custom models.
|
2655
2783
|
# @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
|
2656
2784
|
# has been modified since it was last trained. Use this parameter only to force the
|
2657
2785
|
# upgrade of a custom acoustic model that is trained with a custom language model,
|
@@ -2746,14 +2874,14 @@ module IBMWatson
|
|
2746
2874
|
# same name as an existing audio resource, set the `allow_overwrite` parameter to
|
2747
2875
|
# `true`; otherwise, the request fails.
|
2748
2876
|
#
|
2749
|
-
# The method is asynchronous. It can take several seconds to complete
|
2750
|
-
# the duration of the audio and, in the case of an archive file, the
|
2751
|
-
# audio files being processed. The service returns a 201 response
|
2752
|
-
# is valid. It then asynchronously analyzes the contents of the
|
2753
|
-
# and automatically extracts information about the audio such as
|
2754
|
-
# sampling rate, and encoding. You cannot submit requests to train or
|
2755
|
-
# model until the service's analysis of all audio resources for current
|
2756
|
-
# completes.
|
2877
|
+
# The method is asynchronous. It can take several seconds or minutes to complete
|
2878
|
+
# depending on the duration of the audio and, in the case of an archive file, the
|
2879
|
+
# total number of audio files being processed. The service returns a 201 response
|
2880
|
+
# code if the audio is valid. It then asynchronously analyzes the contents of the
|
2881
|
+
# audio file or files and automatically extracts information about the audio such as
|
2882
|
+
# its length, sampling rate, and encoding. You cannot submit requests to train or
|
2883
|
+
# upgrade the model until the service's analysis of all audio resources for current
|
2884
|
+
# requests completes.
|
2757
2885
|
#
|
2758
2886
|
# To determine the status of the service's analysis of the audio, use the **Get an
|
2759
2887
|
# audio resource** method to poll the status of the audio. The method accepts the
|
@@ -2842,7 +2970,7 @@ module IBMWatson
|
|
2842
2970
|
# used, their use is strongly discouraged.)
|
2843
2971
|
# * Do not use the name of an audio resource that has already been added to the
|
2844
2972
|
# custom model.
|
2845
|
-
# @param audio_resource [
|
2973
|
+
# @param audio_resource [File] The audio resource that is to be added to the custom acoustic model, an individual
|
2846
2974
|
# audio file or an archive file.
|
2847
2975
|
#
|
2848
2976
|
# With the `curl` command, use the `--data-binary` option to upload the file for the
|
@@ -3010,10 +3138,15 @@ module IBMWatson
|
|
3010
3138
|
# deletes all data for the customer ID, regardless of the method by which the
|
3011
3139
|
# information was added. The method has no effect if no data is associated with the
|
3012
3140
|
# customer ID. You must issue the request with credentials for the same instance of
|
3013
|
-
# the service that was used to associate the customer ID with the data.
|
3014
|
-
#
|
3015
|
-
#
|
3016
|
-
#
|
3141
|
+
# the service that was used to associate the customer ID with the data. You
|
3142
|
+
# associate a customer ID with data by passing the `X-Watson-Metadata` header with a
|
3143
|
+
# request that passes the data.
|
3144
|
+
#
|
3145
|
+
# **Note:** If you delete an instance of the service from the service console, all
|
3146
|
+
# data associated with that service instance is automatically deleted. This includes
|
3147
|
+
# all custom language models, corpora, grammars, and words; all custom acoustic
|
3148
|
+
# models and audio resources; all registered endpoints for the asynchronous HTTP
|
3149
|
+
# interface; and all data related to speech recognition requests.
|
3017
3150
|
#
|
3018
3151
|
# **See also:** [Information
|
3019
3152
|
# security](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-information-security#information-security).
|