ibm_watson 1.1.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +33 -5
- data/lib/ibm_watson/assistant_v1.rb +156 -65
- data/lib/ibm_watson/assistant_v2.rb +76 -13
- data/lib/ibm_watson/compare_comply_v1.rb +11 -5
- data/lib/ibm_watson/discovery_v1.rb +19 -9
- data/lib/ibm_watson/discovery_v2.rb +25 -9
- data/lib/ibm_watson/language_translator_v3.rb +27 -13
- data/lib/ibm_watson/natural_language_classifier_v1.rb +10 -4
- data/lib/ibm_watson/natural_language_understanding_v1.rb +20 -10
- data/lib/ibm_watson/personality_insights_v3.rb +17 -11
- data/lib/ibm_watson/speech_to_text_v1.rb +394 -168
- data/lib/ibm_watson/text_to_speech_v1.rb +57 -46
- data/lib/ibm_watson/tone_analyzer_v3.rb +11 -5
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +15 -7
- data/lib/ibm_watson/visual_recognition_v4.rb +199 -4
- data/test/integration/test_assistant_v2.rb +7 -0
- data/test/integration/test_compare_comply_v1.rb +1 -12
- data/test/integration/test_speech_to_text_v1.rb +7 -2
- data/test/integration/test_visual_recognition_v4.rb +9 -0
- data/test/unit/test_assistant_v2.rb +66 -0
- data/test/unit/test_personality_insights_v3.rb +4 -0
- data/test/unit/test_visual_recognition_v4.rb +87 -0
- metadata +5 -6
- data/test/unit/test_vcap_using_personality_insights.rb +0 -161
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# (C) Copyright IBM Corp.
|
3
|
+
# (C) Copyright IBM Corp. 2018, 2020.
|
4
4
|
#
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
6
|
# you may not use this file except in compliance with the License.
|
@@ -31,6 +31,8 @@ module IBMWatson
|
|
31
31
|
# The Natural Language Classifier V1 service.
|
32
32
|
class NaturalLanguageClassifierV1 < IBMCloudSdkCore::BaseService
|
33
33
|
include Concurrent::Async
|
34
|
+
DEFAULT_SERVICE_NAME = "natural_language_classifier"
|
35
|
+
DEFAULT_SERVICE_URL = "https://gateway.watsonplatform.net/natural-language-classifier/api"
|
34
36
|
##
|
35
37
|
# @!method initialize(args)
|
36
38
|
# Construct a new client for the Natural Language Classifier service.
|
@@ -39,15 +41,19 @@ module IBMWatson
|
|
39
41
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
40
42
|
# The base service_url may differ between IBM Cloud regions.
|
41
43
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
44
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
45
|
+
# any external configuration, if applicable.
|
42
46
|
def initialize(args = {})
|
43
47
|
@__async_initialized__ = false
|
44
48
|
defaults = {}
|
45
|
-
defaults[:service_url] =
|
49
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
50
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
46
51
|
defaults[:authenticator] = nil
|
52
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
47
53
|
args = defaults.merge(args)
|
48
|
-
args[:service_name] = "natural_language_classifier"
|
49
54
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
50
55
|
super
|
56
|
+
@service_url = user_service_url unless user_service_url.nil?
|
51
57
|
end
|
52
58
|
|
53
59
|
#########################
|
@@ -141,7 +147,7 @@ module IBMWatson
|
|
141
147
|
# (`pt`), and Spanish (`es`).
|
142
148
|
# @param training_data [File] Training data in CSV format. Each text value must have at least one class. The
|
143
149
|
# data can include up to 3,000 classes and 20,000 records. For details, see [Data
|
144
|
-
# preparation](https://cloud.ibm.com/docs/
|
150
|
+
# preparation](https://cloud.ibm.com/docs/natural-language-classifier?topic=natural-language-classifier-using-your-data).
|
145
151
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
146
152
|
def create_classifier(training_metadata:, training_data:)
|
147
153
|
raise ArgumentError.new("training_metadata must be provided") if training_metadata.nil?
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# (C) Copyright IBM Corp.
|
3
|
+
# (C) Copyright IBM Corp. 2018, 2020.
|
4
4
|
#
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
6
|
# you may not use this file except in compliance with the License.
|
@@ -20,9 +20,9 @@
|
|
20
20
|
# can ignore most advertisements and other unwanted content.
|
21
21
|
#
|
22
22
|
# You can create [custom
|
23
|
-
# models](https://cloud.ibm.com/docs/
|
24
|
-
# with Watson Knowledge Studio to detect custom entities
|
25
|
-
#
|
23
|
+
# models](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-customizing)
|
24
|
+
# with Watson Knowledge Studio to detect custom entities and relations in Natural Language
|
25
|
+
# Understanding.
|
26
26
|
|
27
27
|
require "concurrent"
|
28
28
|
require "erb"
|
@@ -36,6 +36,8 @@ module IBMWatson
|
|
36
36
|
# The Natural Language Understanding V1 service.
|
37
37
|
class NaturalLanguageUnderstandingV1 < IBMCloudSdkCore::BaseService
|
38
38
|
include Concurrent::Async
|
39
|
+
DEFAULT_SERVICE_NAME = "natural_language_understanding"
|
40
|
+
DEFAULT_SERVICE_URL = "https://gateway.watsonplatform.net/natural-language-understanding/api"
|
39
41
|
##
|
40
42
|
# @!method initialize(args)
|
41
43
|
# Construct a new client for the Natural Language Understanding service.
|
@@ -54,19 +56,23 @@ module IBMWatson
|
|
54
56
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
55
57
|
# The base service_url may differ between IBM Cloud regions.
|
56
58
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
59
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
60
|
+
# any external configuration, if applicable.
|
57
61
|
def initialize(args = {})
|
58
62
|
@__async_initialized__ = false
|
59
63
|
defaults = {}
|
60
64
|
defaults[:version] = nil
|
61
|
-
defaults[:service_url] =
|
65
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
66
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
62
67
|
defaults[:authenticator] = nil
|
68
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
63
69
|
args = defaults.merge(args)
|
64
70
|
@version = args[:version]
|
65
71
|
raise ArgumentError.new("version must be provided") if @version.nil?
|
66
72
|
|
67
|
-
args[:service_name] = "natural_language_understanding"
|
68
73
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
69
74
|
super
|
75
|
+
@service_url = user_service_url unless user_service_url.nil?
|
70
76
|
end
|
71
77
|
|
72
78
|
#########################
|
@@ -87,6 +93,10 @@ module IBMWatson
|
|
87
93
|
# - Semantic roles
|
88
94
|
# - Sentiment
|
89
95
|
# - Syntax (Experimental).
|
96
|
+
#
|
97
|
+
# If a language for the input text is not specified with the `language` parameter,
|
98
|
+
# the service [automatically detects the
|
99
|
+
# language](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-detectable-languages).
|
90
100
|
# @param features [Features] Specific features to analyze the document for.
|
91
101
|
# @param text [String] The plain text to analyze. One of the `text`, `html`, or `url` parameters is
|
92
102
|
# required.
|
@@ -96,10 +106,10 @@ module IBMWatson
|
|
96
106
|
# required.
|
97
107
|
# @param clean [Boolean] Set this to `false` to disable webpage cleaning. To learn more about webpage
|
98
108
|
# cleaning, see the [Analyzing
|
99
|
-
# webpages](https://cloud.ibm.com/docs/
|
109
|
+
# webpages](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages)
|
100
110
|
# documentation.
|
101
111
|
# @param xpath [String] An [XPath
|
102
|
-
# query](https://cloud.ibm.com/docs/
|
112
|
+
# query](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-analyzing-webpages#xpath)
|
103
113
|
# to perform on `html` or `url` input. Results of the query will be appended to the
|
104
114
|
# cleaned webpage text before it is analyzed. To analyze only the results of the
|
105
115
|
# XPath query, set the `clean` parameter to `false`.
|
@@ -108,7 +118,7 @@ module IBMWatson
|
|
108
118
|
# @param language [String] ISO 639-1 code that specifies the language of your text. This overrides automatic
|
109
119
|
# language detection. Language support differs depending on the features you include
|
110
120
|
# in your analysis. See [Language
|
111
|
-
# support](https://cloud.ibm.com/docs/
|
121
|
+
# support](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-language-support)
|
112
122
|
# for more information.
|
113
123
|
# @param limit_text_characters [Fixnum] Sets the maximum number of characters that are processed by the service.
|
114
124
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
@@ -157,7 +167,7 @@ module IBMWatson
|
|
157
167
|
# @!method list_models
|
158
168
|
# List models.
|
159
169
|
# Lists Watson Knowledge Studio [custom entities and relations
|
160
|
-
# models](https://cloud.ibm.com/docs/
|
170
|
+
# models](https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-customizing)
|
161
171
|
# that are deployed to your Natural Language Understanding service.
|
162
172
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
163
173
|
def list_models
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# (C) Copyright IBM Corp.
|
3
|
+
# (C) Copyright IBM Corp. 2018, 2020.
|
4
4
|
#
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
6
|
# you may not use this file except in compliance with the License.
|
@@ -26,9 +26,9 @@
|
|
26
26
|
# is timestamped, can report temporal behavior.
|
27
27
|
# * For information about the meaning of the models that the service uses to describe
|
28
28
|
# personality characteristics, see [Personality
|
29
|
-
# models](https://cloud.ibm.com/docs/
|
29
|
+
# models](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-models#models).
|
30
30
|
# * For information about the meaning of the consumption preferences, see [Consumption
|
31
|
-
# preferences](https://cloud.ibm.com/docs/
|
31
|
+
# preferences](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-preferences#preferences).
|
32
32
|
#
|
33
33
|
#
|
34
34
|
# **Note:** Request logging is disabled for the Personality Insights service. Regardless
|
@@ -47,6 +47,8 @@ module IBMWatson
|
|
47
47
|
# The Personality Insights V3 service.
|
48
48
|
class PersonalityInsightsV3 < IBMCloudSdkCore::BaseService
|
49
49
|
include Concurrent::Async
|
50
|
+
DEFAULT_SERVICE_NAME = "personality_insights"
|
51
|
+
DEFAULT_SERVICE_URL = "https://gateway.watsonplatform.net/personality-insights/api"
|
50
52
|
##
|
51
53
|
# @!method initialize(args)
|
52
54
|
# Construct a new client for the Personality Insights service.
|
@@ -65,19 +67,23 @@ module IBMWatson
|
|
65
67
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
66
68
|
# The base service_url may differ between IBM Cloud regions.
|
67
69
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
70
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
71
|
+
# any external configuration, if applicable.
|
68
72
|
def initialize(args = {})
|
69
73
|
@__async_initialized__ = false
|
70
74
|
defaults = {}
|
71
75
|
defaults[:version] = nil
|
72
|
-
defaults[:service_url] =
|
76
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
77
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
73
78
|
defaults[:authenticator] = nil
|
79
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
74
80
|
args = defaults.merge(args)
|
75
81
|
@version = args[:version]
|
76
82
|
raise ArgumentError.new("version must be provided") if @version.nil?
|
77
83
|
|
78
|
-
args[:service_name] = "personality_insights"
|
79
84
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
80
85
|
super
|
86
|
+
@service_url = user_service_url unless user_service_url.nil?
|
81
87
|
end
|
82
88
|
|
83
89
|
#########################
|
@@ -95,9 +101,9 @@ module IBMWatson
|
|
95
101
|
#
|
96
102
|
# **See also:**
|
97
103
|
# * [Requesting a
|
98
|
-
# profile](https://cloud.ibm.com/docs/
|
104
|
+
# profile](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-input#input)
|
99
105
|
# * [Providing sufficient
|
100
|
-
# input](https://cloud.ibm.com/docs/
|
106
|
+
# input](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-input#sufficient)
|
101
107
|
#
|
102
108
|
#
|
103
109
|
# ### Content types
|
@@ -115,7 +121,7 @@ module IBMWatson
|
|
115
121
|
# `Content-Type: text/plain;charset=utf-8`.
|
116
122
|
#
|
117
123
|
# **See also:** [Specifying request and response
|
118
|
-
# formats](https://cloud.ibm.com/docs/
|
124
|
+
# formats](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-input#formats)
|
119
125
|
#
|
120
126
|
#
|
121
127
|
# ### Accept types
|
@@ -127,12 +133,12 @@ module IBMWatson
|
|
127
133
|
#
|
128
134
|
# **See also:**
|
129
135
|
# * [Understanding a JSON
|
130
|
-
# profile](https://cloud.ibm.com/docs/
|
136
|
+
# profile](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-output#output)
|
131
137
|
# * [Understanding a CSV
|
132
|
-
# profile](https://cloud.ibm.com/docs/
|
138
|
+
# profile](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-outputCSV#outputCSV).
|
133
139
|
# @param content [Content] A maximum of 20 MB of content to analyze, though the service requires much less
|
134
140
|
# text; for more information, see [Providing sufficient
|
135
|
-
# input](https://cloud.ibm.com/docs/
|
141
|
+
# input](https://cloud.ibm.com/docs/personality-insights?topic=personality-insights-input#sufficient).
|
136
142
|
# For JSON input, provide an object of type `Content`.
|
137
143
|
# @param accept [String] The type of the response. For more information, see **Accept types** in the method
|
138
144
|
# description.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# (C) Copyright IBM Corp.
|
3
|
+
# (C) Copyright IBM Corp. 2018, 2020.
|
4
4
|
#
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
6
|
# you may not use this file except in compliance with the License.
|
@@ -34,9 +34,9 @@
|
|
34
34
|
# is a formal language specification that lets you restrict the phrases that the service
|
35
35
|
# can recognize.
|
36
36
|
#
|
37
|
-
# Language model customization
|
38
|
-
#
|
39
|
-
#
|
37
|
+
# Language model customization and acoustic model customization are generally available
|
38
|
+
# for production use with all language models that are generally available. Grammars are
|
39
|
+
# beta functionality for all language models that support language model customization.
|
40
40
|
|
41
41
|
require "concurrent"
|
42
42
|
require "erb"
|
@@ -50,6 +50,8 @@ module IBMWatson
|
|
50
50
|
# The Speech to Text V1 service.
|
51
51
|
class SpeechToTextV1 < IBMCloudSdkCore::BaseService
|
52
52
|
include Concurrent::Async
|
53
|
+
DEFAULT_SERVICE_NAME = "speech_to_text"
|
54
|
+
DEFAULT_SERVICE_URL = "https://stream.watsonplatform.net/speech-to-text/api"
|
53
55
|
##
|
54
56
|
# @!method initialize(args)
|
55
57
|
# Construct a new client for the Speech to Text service.
|
@@ -58,15 +60,19 @@ module IBMWatson
|
|
58
60
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
59
61
|
# The base service_url may differ between IBM Cloud regions.
|
60
62
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
63
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
64
|
+
# any external configuration, if applicable.
|
61
65
|
def initialize(args = {})
|
62
66
|
@__async_initialized__ = false
|
63
67
|
defaults = {}
|
64
|
-
defaults[:service_url] =
|
68
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
69
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
65
70
|
defaults[:authenticator] = nil
|
71
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
66
72
|
args = defaults.merge(args)
|
67
|
-
args[:service_name] = "speech_to_text"
|
68
73
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
69
74
|
super
|
75
|
+
@service_url = user_service_url unless user_service_url.nil?
|
70
76
|
end
|
71
77
|
|
72
78
|
#########################
|
@@ -81,7 +87,7 @@ module IBMWatson
|
|
81
87
|
# among other things.
|
82
88
|
#
|
83
89
|
# **See also:** [Languages and
|
84
|
-
# models](https://cloud.ibm.com/docs/
|
90
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
85
91
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
86
92
|
def list_models
|
87
93
|
headers = {
|
@@ -108,7 +114,7 @@ module IBMWatson
|
|
108
114
|
# sampling rate in Hertz, among other things.
|
109
115
|
#
|
110
116
|
# **See also:** [Languages and
|
111
|
-
# models](https://cloud.ibm.com/docs/
|
117
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
112
118
|
# @param model_id [String] The identifier of the model in the form of its name from the output of the **Get a
|
113
119
|
# model** method.
|
114
120
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
@@ -135,7 +141,7 @@ module IBMWatson
|
|
135
141
|
#########################
|
136
142
|
|
137
143
|
##
|
138
|
-
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil)
|
144
|
+
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
139
145
|
# Recognize audio.
|
140
146
|
# Sends audio and returns transcription results for a recognition request. You can
|
141
147
|
# pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
|
@@ -146,7 +152,7 @@ module IBMWatson
|
|
146
152
|
# upload the file for the request.)
|
147
153
|
#
|
148
154
|
# **See also:** [Making a basic HTTP
|
149
|
-
# request](https://cloud.ibm.com/docs/
|
155
|
+
# request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-basic).
|
150
156
|
#
|
151
157
|
#
|
152
158
|
# ### Streaming mode
|
@@ -161,9 +167,9 @@ module IBMWatson
|
|
161
167
|
#
|
162
168
|
# **See also:**
|
163
169
|
# * [Audio
|
164
|
-
# transmission](https://cloud.ibm.com/docs/
|
170
|
+
# transmission](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#transmission)
|
165
171
|
# *
|
166
|
-
# [Timeouts](https://cloud.ibm.com/docs/
|
172
|
+
# [Timeouts](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts)
|
167
173
|
#
|
168
174
|
#
|
169
175
|
# ### Audio formats (content types)
|
@@ -203,7 +209,7 @@ module IBMWatson
|
|
203
209
|
# fails.
|
204
210
|
#
|
205
211
|
# **See also:** [Audio
|
206
|
-
# formats](https://cloud.ibm.com/docs/
|
212
|
+
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
|
207
213
|
#
|
208
214
|
#
|
209
215
|
# ### Multipart speech recognition
|
@@ -222,19 +228,19 @@ module IBMWatson
|
|
222
228
|
# want to spot a very large number of keywords.
|
223
229
|
#
|
224
230
|
# **See also:** [Making a multipart HTTP
|
225
|
-
# request](https://cloud.ibm.com/docs/
|
231
|
+
# request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-multi).
|
226
232
|
# @param audio [String] The audio to transcribe.
|
227
233
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
228
234
|
# audio format, see **Audio formats (content types)** in the method description.
|
229
235
|
# @param model [String] The identifier of the model that is to be used for the recognition request. See
|
230
236
|
# [Languages and
|
231
|
-
# models](https://cloud.ibm.com/docs/
|
237
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
232
238
|
# @param language_customization_id [String] The customization ID (GUID) of a custom language model that is to be used with the
|
233
239
|
# recognition request. The base model of the specified custom language model must
|
234
240
|
# match the model specified with the `model` parameter. You must make the request
|
235
241
|
# with credentials for the instance of the service that owns the custom model. By
|
236
242
|
# default, no custom language model is used. See [Custom
|
237
|
-
# models](https://cloud.ibm.com/docs/
|
243
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
238
244
|
#
|
239
245
|
#
|
240
246
|
# **Note:** Use this parameter instead of the deprecated `customization_id`
|
@@ -244,13 +250,13 @@ module IBMWatson
|
|
244
250
|
# match the model specified with the `model` parameter. You must make the request
|
245
251
|
# with credentials for the instance of the service that owns the custom model. By
|
246
252
|
# default, no custom acoustic model is used. See [Custom
|
247
|
-
# models](https://cloud.ibm.com/docs/
|
253
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
248
254
|
# @param base_model_version [String] The version of the specified base model that is to be used with the recognition
|
249
255
|
# request. Multiple versions of a base model can exist when a model is updated for
|
250
256
|
# internal improvements. The parameter is intended primarily for use with custom
|
251
257
|
# models that have been upgraded for a new base model. The default value depends on
|
252
258
|
# whether the parameter is used with or without a custom model. See [Base model
|
253
|
-
# version](https://cloud.ibm.com/docs/
|
259
|
+
# version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
|
254
260
|
# @param customization_weight [Float] If you specify the customization ID (GUID) of a custom language model with the
|
255
261
|
# recognition request, the customization weight tells the service how much weight to
|
256
262
|
# give to words from the custom language model compared to those from the base model
|
@@ -268,45 +274,51 @@ module IBMWatson
|
|
268
274
|
# phrases.
|
269
275
|
#
|
270
276
|
# See [Custom
|
271
|
-
# models](https://cloud.ibm.com/docs/
|
277
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
272
278
|
# @param inactivity_timeout [Fixnum] The time in seconds after which, if only silence (no speech) is detected in
|
273
279
|
# streaming audio, the connection is closed with a 400 error. The parameter is
|
274
280
|
# useful for stopping audio submission from a live microphone when a user simply
|
275
281
|
# walks away. Use `-1` for infinity. See [Inactivity
|
276
|
-
# timeout](https://cloud.ibm.com/docs/
|
282
|
+
# timeout](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts-inactivity).
|
277
283
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
278
284
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
279
285
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
280
|
-
# threshold.
|
281
|
-
#
|
282
|
-
#
|
286
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
287
|
+
# keywords.
|
288
|
+
#
|
289
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
290
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
291
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
292
|
+
#
|
293
|
+
# See [Keyword
|
294
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
283
295
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
284
296
|
# considered to match a keyword if its confidence is greater than or equal to the
|
285
297
|
# threshold. Specify a probability between 0.0 and 1.0. If you specify a threshold,
|
286
298
|
# you must also specify one or more keywords. The service performs no keyword
|
287
299
|
# spotting if you omit either parameter. See [Keyword
|
288
|
-
# spotting](https://cloud.ibm.com/docs/
|
300
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
289
301
|
# @param max_alternatives [Fixnum] The maximum number of alternative transcripts that the service is to return. By
|
290
302
|
# default, the service returns a single transcript. If you specify a value of `0`,
|
291
303
|
# the service uses the default value, `1`. See [Maximum
|
292
|
-
# alternatives](https://cloud.ibm.com/docs/
|
304
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#max_alternatives).
|
293
305
|
# @param word_alternatives_threshold [Float] A confidence value that is the lower bound for identifying a hypothesis as a
|
294
306
|
# possible word alternative (also known as "Confusion Networks"). An alternative
|
295
307
|
# word is considered if its confidence is greater than or equal to the threshold.
|
296
308
|
# Specify a probability between 0.0 and 1.0. By default, the service computes no
|
297
309
|
# alternative words. See [Word
|
298
|
-
# alternatives](https://cloud.ibm.com/docs/
|
310
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_alternatives).
|
299
311
|
# @param word_confidence [Boolean] If `true`, the service returns a confidence measure in the range of 0.0 to 1.0 for
|
300
312
|
# each word. By default, the service returns no word confidence scores. See [Word
|
301
|
-
# confidence](https://cloud.ibm.com/docs/
|
313
|
+
# confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_confidence).
|
302
314
|
# @param timestamps [Boolean] If `true`, the service returns time alignment for each word. By default, no
|
303
315
|
# timestamps are returned. See [Word
|
304
|
-
# timestamps](https://cloud.ibm.com/docs/
|
316
|
+
# timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_timestamps).
|
305
317
|
# @param profanity_filter [Boolean] If `true`, the service filters profanity from all output except for keyword
|
306
318
|
# results by replacing inappropriate words with a series of asterisks. Set the
|
307
319
|
# parameter to `false` to return results with no censoring. Applies to US English
|
308
320
|
# transcription only. See [Profanity
|
309
|
-
# filtering](https://cloud.ibm.com/docs/
|
321
|
+
# filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#profanity_filter).
|
310
322
|
# @param smart_formatting [Boolean] If `true`, the service converts dates, times, series of digits and numbers, phone
|
311
323
|
# numbers, currency values, and internet addresses into more readable, conventional
|
312
324
|
# representations in the final transcript of a recognition request. For US English,
|
@@ -316,21 +328,21 @@ module IBMWatson
|
|
316
328
|
# **Note:** Applies to US English, Japanese, and Spanish transcription only.
|
317
329
|
#
|
318
330
|
# See [Smart
|
319
|
-
# formatting](https://cloud.ibm.com/docs/
|
331
|
+
# formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#smart_formatting).
|
320
332
|
# @param speaker_labels [Boolean] If `true`, the response includes labels that identify which words were spoken by
|
321
333
|
# which participants in a multi-person exchange. By default, the service returns no
|
322
334
|
# speaker labels. Setting `speaker_labels` to `true` forces the `timestamps`
|
323
335
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
324
336
|
# parameter.
|
325
337
|
#
|
326
|
-
# **Note:** Applies to US English, Japanese, and Spanish (both
|
327
|
-
# narrowband models) and UK English (narrowband model) transcription
|
328
|
-
# determine whether a language model supports speaker labels, you can also
|
329
|
-
# **Get a model** method and check that the attribute `speaker_labels` is
|
330
|
-
# `true`.
|
338
|
+
# **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
|
339
|
+
# broadband and narrowband models) and UK English (narrowband model) transcription
|
340
|
+
# only. To determine whether a language model supports speaker labels, you can also
|
341
|
+
# use the **Get a model** method and check that the attribute `speaker_labels` is
|
342
|
+
# set to `true`.
|
331
343
|
#
|
332
344
|
# See [Speaker
|
333
|
-
# labels](https://cloud.ibm.com/docs/
|
345
|
+
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
334
346
|
# @param customization_id [String] **Deprecated.** Use the `language_customization_id` parameter to specify the
|
335
347
|
# customization ID (GUID) of a custom language model that is to be used with the
|
336
348
|
# recognition request. Do not specify both parameters with a request.
|
@@ -339,7 +351,7 @@ module IBMWatson
|
|
339
351
|
# specify the name of the custom language model for which the grammar is defined.
|
340
352
|
# The service recognizes only strings that are recognized by the specified grammar;
|
341
353
|
# it does not recognize other custom words from the model's words resource. See
|
342
|
-
# [Grammars](https://cloud.ibm.com/docs/
|
354
|
+
# [Grammars](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#grammars-input).
|
343
355
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
344
356
|
# feature redacts any number that has three or more consecutive digits by replacing
|
345
357
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -354,12 +366,67 @@ module IBMWatson
|
|
354
366
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
355
367
|
#
|
356
368
|
# See [Numeric
|
357
|
-
# redaction](https://cloud.ibm.com/docs/
|
369
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
|
358
370
|
# @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
|
359
371
|
# input audio. The service returns audio metrics with the final transcription
|
360
372
|
# results. By default, the service returns no audio metrics.
|
373
|
+
#
|
374
|
+
# See [Audio
|
375
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
|
376
|
+
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
377
|
+
# splits a transcript into multiple final results. If the service detects pauses or
|
378
|
+
# extended silence before it reaches the end of the audio stream, its response can
|
379
|
+
# include multiple final results. Silence indicates a point at which the speaker
|
380
|
+
# pauses between spoken words or phrases.
|
381
|
+
#
|
382
|
+
# Specify a value for the pause interval in the range of 0.0 to 120.0.
|
383
|
+
# * A value greater than 0 specifies the interval that the service is to use for
|
384
|
+
# speech recognition.
|
385
|
+
# * A value of 0 indicates that the service is to use the default interval. It is
|
386
|
+
# equivalent to omitting the parameter.
|
387
|
+
#
|
388
|
+
# The default pause interval for most languages is 0.8 seconds; the default for
|
389
|
+
# Chinese is 0.6 seconds.
|
390
|
+
#
|
391
|
+
# See [End of phrase silence
|
392
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
|
393
|
+
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
394
|
+
# based on semantic features of the input, for example, at the conclusion of
|
395
|
+
# meaningful phrases such as sentences. The service bases its understanding of
|
396
|
+
# semantic features on the base language model that you use with a request. Custom
|
397
|
+
# language models and grammars can also influence how and where the service splits a
|
398
|
+
# transcript. By default, the service splits transcripts based solely on the pause
|
399
|
+
# interval.
|
400
|
+
#
|
401
|
+
# See [Split transcript at phrase
|
402
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
403
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
404
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
405
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
406
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
407
|
+
# activity.
|
408
|
+
#
|
409
|
+
# Specify a value between 0.0 and 1.0:
|
410
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
411
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
412
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
413
|
+
#
|
414
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
415
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
416
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
417
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
418
|
+
# conversations or background noise.
|
419
|
+
#
|
420
|
+
# Specify a value in the range of 0.0 to 1.0:
|
421
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
422
|
+
# disabled).
|
423
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
424
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
425
|
+
#
|
426
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
427
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
361
428
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
362
|
-
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil)
|
429
|
+
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
363
430
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
364
431
|
|
365
432
|
headers = {
|
@@ -388,7 +455,11 @@ module IBMWatson
|
|
388
455
|
"customization_id" => customization_id,
|
389
456
|
"grammar_name" => grammar_name,
|
390
457
|
"redaction" => redaction,
|
391
|
-
"audio_metrics" => audio_metrics
|
458
|
+
"audio_metrics" => audio_metrics,
|
459
|
+
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
460
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
461
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
462
|
+
"background_audio_suppression" => background_audio_suppression
|
392
463
|
}
|
393
464
|
|
394
465
|
data = audio
|
@@ -407,7 +478,7 @@ module IBMWatson
|
|
407
478
|
end
|
408
479
|
|
409
480
|
##
|
410
|
-
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil)
|
481
|
+
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
411
482
|
# Sends audio for speech recognition using web sockets.
|
412
483
|
# @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
|
413
484
|
# @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
|
@@ -417,7 +488,7 @@ module IBMWatson
|
|
417
488
|
# @param customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
|
418
489
|
# @param acoustic_customization_id [String] The GUID of a custom acoustic model that is to be used with the request. The base model of the specified custom acoustic model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom acoustic model is used.
|
419
490
|
# @param language_customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
|
420
|
-
# @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://
|
491
|
+
# @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
|
421
492
|
# @param inactivity_timeout [Integer] The time in seconds after which, if only silence (no speech) is detected in submitted audio, the connection is closed with a 400 error. Useful for stopping audio submission from a live microphone when a user simply walks away. Use `-1` for infinity.
|
422
493
|
# @param interim_results [Boolean] Send back non-final previews of each "sentence" as it is being processed. These results are ignored in text mode.
|
423
494
|
# @param keywords [Array<String>] Array of keyword strings to spot in the audio. Each keyword string can include one or more tokens. Keywords are spotted only in the final hypothesis, not in interim results. If you specify any keywords, you must also specify a keywords threshold. Omit the parameter or specify an empty array if you do not need to spot keywords.
|
@@ -428,13 +499,13 @@ module IBMWatson
|
|
428
499
|
# @param timestamps [Boolean] If `true`, time alignment for each word is returned.
|
429
500
|
# @param profanity_filter [Boolean] If `true` (the default), filters profanity from all output except for keyword results by replacing inappropriate words with a series of asterisks. Set the parameter to `false` to return results with no censoring. Applies to US English transcription only.
|
430
501
|
# @param smart_formatting [Boolean] If `true`, converts dates, times, series of digits and numbers, phone numbers, currency values, and Internet addresses into more readable, conventional representations in the final transcript of a recognition request. If `false` (the default), no formatting is performed. Applies to US English transcription only.
|
431
|
-
# @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://
|
502
|
+
# @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter. To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
432
503
|
# @param grammar_name [String] The name of a grammar that is to be used with the recognition request. If you
|
433
504
|
# specify a grammar, you must also use the `language_customization_id` parameter to
|
434
505
|
# specify the name of the custom language model for which the grammar is defined.
|
435
506
|
# The service recognizes only strings that are recognized by the specified grammar;
|
436
507
|
# it does not recognize other custom words from the model's words resource. See
|
437
|
-
# [Grammars](https://cloud.ibm.com/docs/
|
508
|
+
# [Grammars](https://cloud.ibm.com/docs/speech-to-text/output.html).
|
438
509
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
439
510
|
# feature redacts any number that has three or more consecutive digits by replacing
|
440
511
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -449,7 +520,7 @@ module IBMWatson
|
|
449
520
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
450
521
|
#
|
451
522
|
# See [Numeric
|
452
|
-
# redaction](https://cloud.ibm.com/docs/
|
523
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
|
453
524
|
#
|
454
525
|
# @param processing_metrics [Boolean] If `true`, requests processing metrics about the service's transcription of the
|
455
526
|
# input audio. The service returns processing metrics at the interval specified by
|
@@ -469,6 +540,62 @@ module IBMWatson
|
|
469
540
|
# input audio. The service returns audio metrics with the final transcription
|
470
541
|
# results. By default, the service returns no audio metrics.
|
471
542
|
# @return [WebSocketClient] Returns a new WebSocketClient object
|
543
|
+
#
|
544
|
+
# See [Audio
|
545
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
|
546
|
+
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
547
|
+
# splits a transcript into multiple final results. If the service detects pauses or
|
548
|
+
# extended silence before it reaches the end of the audio stream, its response can
|
549
|
+
# include multiple final results. Silence indicates a point at which the speaker
|
550
|
+
# pauses between spoken words or phrases.
|
551
|
+
#
|
552
|
+
# Specify a value for the pause interval in the range of 0.0 to 120.0.
|
553
|
+
# * A value greater than 0 specifies the interval that the service is to use for
|
554
|
+
# speech recognition.
|
555
|
+
# * A value of 0 indicates that the service is to use the default interval. It is
|
556
|
+
# equivalent to omitting the parameter.
|
557
|
+
#
|
558
|
+
# The default pause interval for most languages is 0.8 seconds; the default for
|
559
|
+
# Chinese is 0.6 seconds.
|
560
|
+
#
|
561
|
+
# See [End of phrase silence
|
562
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
|
563
|
+
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
564
|
+
# based on semantic features of the input, for example, at the conclusion of
|
565
|
+
# meaningful phrases such as sentences. The service bases its understanding of
|
566
|
+
# semantic features on the base language model that you use with a request. Custom
|
567
|
+
# language models and grammars can also influence how and where the service splits a
|
568
|
+
# transcript. By default, the service splits transcripts based solely on the pause
|
569
|
+
# interval.
|
570
|
+
#
|
571
|
+
# See [Split transcript at phrase
|
572
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
573
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
574
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
575
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
576
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
577
|
+
# activity.
|
578
|
+
#
|
579
|
+
# Specify a value between 0.0 and 1.0:
|
580
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
581
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
582
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
583
|
+
#
|
584
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
585
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
586
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
587
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
588
|
+
# conversations or background noise.
|
589
|
+
#
|
590
|
+
# Specify a value in the range of 0.0 to 1.0:
|
591
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
592
|
+
# disabled).
|
593
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
594
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
595
|
+
#
|
596
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
597
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
598
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
472
599
|
def recognize_using_websocket(
|
473
600
|
content_type: nil,
|
474
601
|
recognize_callback:,
|
@@ -495,7 +622,11 @@ module IBMWatson
|
|
495
622
|
redaction: nil,
|
496
623
|
processing_metrics: nil,
|
497
624
|
processing_metrics_interval: nil,
|
498
|
-
audio_metrics: nil
|
625
|
+
audio_metrics: nil,
|
626
|
+
end_of_phrase_silence_time: nil,
|
627
|
+
split_transcript_at_phrase_end: nil,
|
628
|
+
speech_detector_sensitivity: nil,
|
629
|
+
background_audio_suppression: nil
|
499
630
|
)
|
500
631
|
raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
|
501
632
|
raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
|
@@ -504,6 +635,7 @@ module IBMWatson
|
|
504
635
|
require_relative("./websocket/speech_to_text_websocket_listener.rb")
|
505
636
|
headers = {}
|
506
637
|
headers = conn.default_options.headers.to_hash unless conn.default_options.headers.to_hash.empty?
|
638
|
+
@authenticator.authenticate(headers)
|
507
639
|
service_url = @service_url.gsub("https:", "wss:")
|
508
640
|
params = {
|
509
641
|
"model" => model,
|
@@ -532,7 +664,11 @@ module IBMWatson
|
|
532
664
|
"redaction" => redaction,
|
533
665
|
"processing_metrics" => processing_metrics,
|
534
666
|
"processing_metrics_interval" => processing_metrics_interval,
|
535
|
-
"audio_metrics" => audio_metrics
|
667
|
+
"audio_metrics" => audio_metrics,
|
668
|
+
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
669
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
670
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
671
|
+
"background_audio_suppression" => background_audio_suppression
|
536
672
|
}
|
537
673
|
options.delete_if { |_, v| v.nil? }
|
538
674
|
WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
|
@@ -577,7 +713,7 @@ module IBMWatson
|
|
577
713
|
# a one-hour span of time.
|
578
714
|
#
|
579
715
|
# **See also:** [Registering a callback
|
580
|
-
# URL](https://cloud.ibm.com/docs/
|
716
|
+
# URL](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#register).
|
581
717
|
# @param callback_url [String] An HTTP or HTTPS URL to which callback notifications are to be sent. To be
|
582
718
|
# white-listed, the URL must successfully echo the challenge string during URL
|
583
719
|
# verification. During verification, the client can also check the signature that
|
@@ -622,7 +758,7 @@ module IBMWatson
|
|
622
758
|
# URL can no longer be used with asynchronous recognition requests.
|
623
759
|
#
|
624
760
|
# **See also:** [Unregistering a callback
|
625
|
-
# URL](https://cloud.ibm.com/docs/
|
761
|
+
# URL](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#unregister).
|
626
762
|
# @param callback_url [String] The callback URL that is to be unregistered.
|
627
763
|
# @return [nil]
|
628
764
|
def unregister_callback(callback_url:)
|
@@ -650,7 +786,7 @@ module IBMWatson
|
|
650
786
|
end
|
651
787
|
|
652
788
|
##
|
653
|
-
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil)
|
789
|
+
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
654
790
|
# Create a job.
|
655
791
|
# Creates a job for a new asynchronous recognition request. The job is owned by the
|
656
792
|
# instance of the service whose credentials are used to create it. How you learn the
|
@@ -690,7 +826,7 @@ module IBMWatson
|
|
690
826
|
# option to upload the file for the request.)
|
691
827
|
#
|
692
828
|
# **See also:** [Creating a
|
693
|
-
# job](https://cloud.ibm.com/docs/
|
829
|
+
# job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#create).
|
694
830
|
#
|
695
831
|
#
|
696
832
|
# ### Streaming mode
|
@@ -705,9 +841,9 @@ module IBMWatson
|
|
705
841
|
#
|
706
842
|
# **See also:**
|
707
843
|
# * [Audio
|
708
|
-
# transmission](https://cloud.ibm.com/docs/
|
844
|
+
# transmission](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#transmission)
|
709
845
|
# *
|
710
|
-
# [Timeouts](https://cloud.ibm.com/docs/
|
846
|
+
# [Timeouts](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts)
|
711
847
|
#
|
712
848
|
#
|
713
849
|
# ### Audio formats (content types)
|
@@ -747,13 +883,13 @@ module IBMWatson
|
|
747
883
|
# fails.
|
748
884
|
#
|
749
885
|
# **See also:** [Audio
|
750
|
-
# formats](https://cloud.ibm.com/docs/
|
886
|
+
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
|
751
887
|
# @param audio [String] The audio to transcribe.
|
752
888
|
# @param content_type [String] The format (MIME type) of the audio. For more information about specifying an
|
753
889
|
# audio format, see **Audio formats (content types)** in the method description.
|
754
890
|
# @param model [String] The identifier of the model that is to be used for the recognition request. See
|
755
891
|
# [Languages and
|
756
|
-
# models](https://cloud.ibm.com/docs/
|
892
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models).
|
757
893
|
# @param callback_url [String] A URL to which callback notifications are to be sent. The URL must already be
|
758
894
|
# successfully white-listed by using the **Register a callback** method. You can
|
759
895
|
# include the same callback URL with any number of job creation requests. Omit the
|
@@ -793,7 +929,7 @@ module IBMWatson
|
|
793
929
|
# match the model specified with the `model` parameter. You must make the request
|
794
930
|
# with credentials for the instance of the service that owns the custom model. By
|
795
931
|
# default, no custom language model is used. See [Custom
|
796
|
-
# models](https://cloud.ibm.com/docs/
|
932
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
797
933
|
#
|
798
934
|
#
|
799
935
|
# **Note:** Use this parameter instead of the deprecated `customization_id`
|
@@ -803,13 +939,13 @@ module IBMWatson
|
|
803
939
|
# match the model specified with the `model` parameter. You must make the request
|
804
940
|
# with credentials for the instance of the service that owns the custom model. By
|
805
941
|
# default, no custom acoustic model is used. See [Custom
|
806
|
-
# models](https://cloud.ibm.com/docs/
|
942
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
807
943
|
# @param base_model_version [String] The version of the specified base model that is to be used with the recognition
|
808
944
|
# request. Multiple versions of a base model can exist when a model is updated for
|
809
945
|
# internal improvements. The parameter is intended primarily for use with custom
|
810
946
|
# models that have been upgraded for a new base model. The default value depends on
|
811
947
|
# whether the parameter is used with or without a custom model. See [Base model
|
812
|
-
# version](https://cloud.ibm.com/docs/
|
948
|
+
# version](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#version).
|
813
949
|
# @param customization_weight [Float] If you specify the customization ID (GUID) of a custom language model with the
|
814
950
|
# recognition request, the customization weight tells the service how much weight to
|
815
951
|
# give to words from the custom language model compared to those from the base model
|
@@ -827,45 +963,51 @@ module IBMWatson
|
|
827
963
|
# phrases.
|
828
964
|
#
|
829
965
|
# See [Custom
|
830
|
-
# models](https://cloud.ibm.com/docs/
|
966
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#custom-input).
|
831
967
|
# @param inactivity_timeout [Fixnum] The time in seconds after which, if only silence (no speech) is detected in
|
832
968
|
# streaming audio, the connection is closed with a 400 error. The parameter is
|
833
969
|
# useful for stopping audio submission from a live microphone when a user simply
|
834
970
|
# walks away. Use `-1` for infinity. See [Inactivity
|
835
|
-
# timeout](https://cloud.ibm.com/docs/
|
971
|
+
# timeout](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts-inactivity).
|
836
972
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
837
973
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
838
974
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
839
|
-
# threshold.
|
840
|
-
#
|
841
|
-
#
|
975
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
976
|
+
# keywords.
|
977
|
+
#
|
978
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
979
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
980
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
981
|
+
#
|
982
|
+
# See [Keyword
|
983
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
842
984
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
843
985
|
# considered to match a keyword if its confidence is greater than or equal to the
|
844
986
|
# threshold. Specify a probability between 0.0 and 1.0. If you specify a threshold,
|
845
987
|
# you must also specify one or more keywords. The service performs no keyword
|
846
988
|
# spotting if you omit either parameter. See [Keyword
|
847
|
-
# spotting](https://cloud.ibm.com/docs/
|
989
|
+
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
848
990
|
# @param max_alternatives [Fixnum] The maximum number of alternative transcripts that the service is to return. By
|
849
991
|
# default, the service returns a single transcript. If you specify a value of `0`,
|
850
992
|
# the service uses the default value, `1`. See [Maximum
|
851
|
-
# alternatives](https://cloud.ibm.com/docs/
|
993
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#max_alternatives).
|
852
994
|
# @param word_alternatives_threshold [Float] A confidence value that is the lower bound for identifying a hypothesis as a
|
853
995
|
# possible word alternative (also known as "Confusion Networks"). An alternative
|
854
996
|
# word is considered if its confidence is greater than or equal to the threshold.
|
855
997
|
# Specify a probability between 0.0 and 1.0. By default, the service computes no
|
856
998
|
# alternative words. See [Word
|
857
|
-
# alternatives](https://cloud.ibm.com/docs/
|
999
|
+
# alternatives](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_alternatives).
|
858
1000
|
# @param word_confidence [Boolean] If `true`, the service returns a confidence measure in the range of 0.0 to 1.0 for
|
859
1001
|
# each word. By default, the service returns no word confidence scores. See [Word
|
860
|
-
# confidence](https://cloud.ibm.com/docs/
|
1002
|
+
# confidence](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_confidence).
|
861
1003
|
# @param timestamps [Boolean] If `true`, the service returns time alignment for each word. By default, no
|
862
1004
|
# timestamps are returned. See [Word
|
863
|
-
# timestamps](https://cloud.ibm.com/docs/
|
1005
|
+
# timestamps](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#word_timestamps).
|
864
1006
|
# @param profanity_filter [Boolean] If `true`, the service filters profanity from all output except for keyword
|
865
1007
|
# results by replacing inappropriate words with a series of asterisks. Set the
|
866
1008
|
# parameter to `false` to return results with no censoring. Applies to US English
|
867
1009
|
# transcription only. See [Profanity
|
868
|
-
# filtering](https://cloud.ibm.com/docs/
|
1010
|
+
# filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#profanity_filter).
|
869
1011
|
# @param smart_formatting [Boolean] If `true`, the service converts dates, times, series of digits and numbers, phone
|
870
1012
|
# numbers, currency values, and internet addresses into more readable, conventional
|
871
1013
|
# representations in the final transcript of a recognition request. For US English,
|
@@ -875,21 +1017,21 @@ module IBMWatson
|
|
875
1017
|
# **Note:** Applies to US English, Japanese, and Spanish transcription only.
|
876
1018
|
#
|
877
1019
|
# See [Smart
|
878
|
-
# formatting](https://cloud.ibm.com/docs/
|
1020
|
+
# formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#smart_formatting).
|
879
1021
|
# @param speaker_labels [Boolean] If `true`, the response includes labels that identify which words were spoken by
|
880
1022
|
# which participants in a multi-person exchange. By default, the service returns no
|
881
1023
|
# speaker labels. Setting `speaker_labels` to `true` forces the `timestamps`
|
882
1024
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
883
1025
|
# parameter.
|
884
1026
|
#
|
885
|
-
# **Note:** Applies to US English, Japanese, and Spanish (both
|
886
|
-
# narrowband models) and UK English (narrowband model) transcription
|
887
|
-
# determine whether a language model supports speaker labels, you can also
|
888
|
-
# **Get a model** method and check that the attribute `speaker_labels` is
|
889
|
-
# `true`.
|
1027
|
+
# **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
|
1028
|
+
# broadband and narrowband models) and UK English (narrowband model) transcription
|
1029
|
+
# only. To determine whether a language model supports speaker labels, you can also
|
1030
|
+
# use the **Get a model** method and check that the attribute `speaker_labels` is
|
1031
|
+
# set to `true`.
|
890
1032
|
#
|
891
1033
|
# See [Speaker
|
892
|
-
# labels](https://cloud.ibm.com/docs/
|
1034
|
+
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
893
1035
|
# @param customization_id [String] **Deprecated.** Use the `language_customization_id` parameter to specify the
|
894
1036
|
# customization ID (GUID) of a custom language model that is to be used with the
|
895
1037
|
# recognition request. Do not specify both parameters with a request.
|
@@ -898,7 +1040,7 @@ module IBMWatson
|
|
898
1040
|
# specify the name of the custom language model for which the grammar is defined.
|
899
1041
|
# The service recognizes only strings that are recognized by the specified grammar;
|
900
1042
|
# it does not recognize other custom words from the model's words resource. See
|
901
|
-
# [Grammars](https://cloud.ibm.com/docs/
|
1043
|
+
# [Grammars](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#grammars-input).
|
902
1044
|
# @param redaction [Boolean] If `true`, the service redacts, or masks, numeric data from final transcripts. The
|
903
1045
|
# feature redacts any number that has three or more consecutive digits by replacing
|
904
1046
|
# each digit with an `X` character. It is intended to redact sensitive numeric data,
|
@@ -913,12 +1055,15 @@ module IBMWatson
|
|
913
1055
|
# **Note:** Applies to US English, Japanese, and Korean transcription only.
|
914
1056
|
#
|
915
1057
|
# See [Numeric
|
916
|
-
# redaction](https://cloud.ibm.com/docs/
|
1058
|
+
# redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#redaction).
|
917
1059
|
# @param processing_metrics [Boolean] If `true`, requests processing metrics about the service's transcription of the
|
918
1060
|
# input audio. The service returns processing metrics at the interval specified by
|
919
1061
|
# the `processing_metrics_interval` parameter. It also returns processing metrics
|
920
1062
|
# for transcription events, for example, for final and interim results. By default,
|
921
1063
|
# the service returns no processing metrics.
|
1064
|
+
#
|
1065
|
+
# See [Processing
|
1066
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing_metrics).
|
922
1067
|
# @param processing_metrics_interval [Float] Specifies the interval in real wall-clock seconds at which the service is to
|
923
1068
|
# return processing metrics. The parameter is ignored unless the
|
924
1069
|
# `processing_metrics` parameter is set to `true`.
|
@@ -930,11 +1075,69 @@ module IBMWatson
|
|
930
1075
|
# metrics only for transcription events instead of at periodic intervals, set the
|
931
1076
|
# value to a large number. If the value is larger than the duration of the audio,
|
932
1077
|
# the service returns processing metrics only for transcription events.
|
1078
|
+
#
|
1079
|
+
# See [Processing
|
1080
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#processing_metrics).
|
933
1081
|
# @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
|
934
1082
|
# input audio. The service returns audio metrics with the final transcription
|
935
1083
|
# results. By default, the service returns no audio metrics.
|
1084
|
+
#
|
1085
|
+
# See [Audio
|
1086
|
+
# metrics](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
|
1087
|
+
# @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
|
1088
|
+
# splits a transcript into multiple final results. If the service detects pauses or
|
1089
|
+
# extended silence before it reaches the end of the audio stream, its response can
|
1090
|
+
# include multiple final results. Silence indicates a point at which the speaker
|
1091
|
+
# pauses between spoken words or phrases.
|
1092
|
+
#
|
1093
|
+
# Specify a value for the pause interval in the range of 0.0 to 120.0.
|
1094
|
+
# * A value greater than 0 specifies the interval that the service is to use for
|
1095
|
+
# speech recognition.
|
1096
|
+
# * A value of 0 indicates that the service is to use the default interval. It is
|
1097
|
+
# equivalent to omitting the parameter.
|
1098
|
+
#
|
1099
|
+
# The default pause interval for most languages is 0.8 seconds; the default for
|
1100
|
+
# Chinese is 0.6 seconds.
|
1101
|
+
#
|
1102
|
+
# See [End of phrase silence
|
1103
|
+
# time](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#silence_time).
|
1104
|
+
# @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
|
1105
|
+
# based on semantic features of the input, for example, at the conclusion of
|
1106
|
+
# meaningful phrases such as sentences. The service bases its understanding of
|
1107
|
+
# semantic features on the base language model that you use with a request. Custom
|
1108
|
+
# language models and grammars can also influence how and where the service splits a
|
1109
|
+
# transcript. By default, the service splits transcripts based solely on the pause
|
1110
|
+
# interval.
|
1111
|
+
#
|
1112
|
+
# See [Split transcript at phrase
|
1113
|
+
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
1114
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
1115
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
1116
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
1117
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
1118
|
+
# activity.
|
1119
|
+
#
|
1120
|
+
# Specify a value between 0.0 and 1.0:
|
1121
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
1122
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
1123
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
1124
|
+
#
|
1125
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1126
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1127
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
1128
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
1129
|
+
# conversations or background noise.
|
1130
|
+
#
|
1131
|
+
# Specify a value in the range of 0.0 to 1.0:
|
1132
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
1133
|
+
# disabled).
|
1134
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
1135
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
1136
|
+
#
|
1137
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1138
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
936
1139
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
937
|
-
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil)
|
1140
|
+
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
938
1141
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
939
1142
|
|
940
1143
|
headers = {
|
@@ -969,7 +1172,11 @@ module IBMWatson
|
|
969
1172
|
"redaction" => redaction,
|
970
1173
|
"processing_metrics" => processing_metrics,
|
971
1174
|
"processing_metrics_interval" => processing_metrics_interval,
|
972
|
-
"audio_metrics" => audio_metrics
|
1175
|
+
"audio_metrics" => audio_metrics,
|
1176
|
+
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
1177
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
1178
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
1179
|
+
"background_audio_suppression" => background_audio_suppression
|
973
1180
|
}
|
974
1181
|
|
975
1182
|
data = audio
|
@@ -1000,7 +1207,7 @@ module IBMWatson
|
|
1000
1207
|
# first.
|
1001
1208
|
#
|
1002
1209
|
# **See also:** [Checking the status of the latest
|
1003
|
-
# jobs](https://cloud.ibm.com/docs/
|
1210
|
+
# jobs](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#jobs).
|
1004
1211
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1005
1212
|
def check_jobs
|
1006
1213
|
headers = {
|
@@ -1035,7 +1242,7 @@ module IBMWatson
|
|
1035
1242
|
# recent jobs associated with the calling credentials.
|
1036
1243
|
#
|
1037
1244
|
# **See also:** [Checking the status and retrieving the results of a
|
1038
|
-
# job](https://cloud.ibm.com/docs/
|
1245
|
+
# job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#job).
|
1039
1246
|
# @param id [String] The identifier of the asynchronous job that is to be used for the request. You
|
1040
1247
|
# must make the request with credentials for the instance of the service that owns
|
1041
1248
|
# the job.
|
@@ -1069,7 +1276,7 @@ module IBMWatson
|
|
1069
1276
|
# owns a job to delete it.
|
1070
1277
|
#
|
1071
1278
|
# **See also:** [Deleting a
|
1072
|
-
# job](https://cloud.ibm.com/docs/
|
1279
|
+
# job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#delete-async).
|
1073
1280
|
# @param id [String] The identifier of the asynchronous job that is to be used for the request. You
|
1074
1281
|
# must make the request with credentials for the instance of the service that owns
|
1075
1282
|
# the job.
|
@@ -1104,13 +1311,13 @@ module IBMWatson
|
|
1104
1311
|
# model is owned by the instance of the service whose credentials are used to create
|
1105
1312
|
# it.
|
1106
1313
|
#
|
1107
|
-
# You can create a maximum of 1024 custom language models
|
1108
|
-
# service returns an error if you attempt to create more than 1024 models. You
|
1109
|
-
# not lose any models, but you cannot create any more until your model count is
|
1314
|
+
# You can create a maximum of 1024 custom language models per owning credentials.
|
1315
|
+
# The service returns an error if you attempt to create more than 1024 models. You
|
1316
|
+
# do not lose any models, but you cannot create any more until your model count is
|
1110
1317
|
# below the limit.
|
1111
1318
|
#
|
1112
1319
|
# **See also:** [Create a custom language
|
1113
|
-
# model](https://cloud.ibm.com/docs/
|
1320
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#createModel-language).
|
1114
1321
|
# @param name [String] A user-defined name for the new custom language model. Use a name that is unique
|
1115
1322
|
# among all custom language models that you own. Use a localized name that matches
|
1116
1323
|
# the language of the custom model. Use a name that describes the domain of the
|
@@ -1122,7 +1329,7 @@ module IBMWatson
|
|
1122
1329
|
# To determine whether a base model supports language model customization, use the
|
1123
1330
|
# **Get a model** method and check that the attribute `custom_language_model` is set
|
1124
1331
|
# to `true`. You can also refer to [Language support for
|
1125
|
-
# customization](https://cloud.ibm.com/docs/
|
1332
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
1126
1333
|
# @param dialect [String] The dialect of the specified language that is to be used with the custom language
|
1127
1334
|
# model. For most languages, the dialect matches the language of the base model by
|
1128
1335
|
# default. For example, `en-US` is used for either of the US English language
|
@@ -1184,7 +1391,7 @@ module IBMWatson
|
|
1184
1391
|
# a model to list information about it.
|
1185
1392
|
#
|
1186
1393
|
# **See also:** [Listing custom language
|
1187
|
-
# models](https://cloud.ibm.com/docs/
|
1394
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language).
|
1188
1395
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
1189
1396
|
# are to be returned (for example, `en-US`). Omit the parameter to see all custom
|
1190
1397
|
# language or custom acoustic models that are owned by the requesting credentials.
|
@@ -1218,7 +1425,7 @@ module IBMWatson
|
|
1218
1425
|
# for the instance of the service that owns a model to list information about it.
|
1219
1426
|
#
|
1220
1427
|
# **See also:** [Listing custom language
|
1221
|
-
# models](https://cloud.ibm.com/docs/
|
1428
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language).
|
1222
1429
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1223
1430
|
# the request. You must make the request with credentials for the instance of the
|
1224
1431
|
# service that owns the custom model.
|
@@ -1251,7 +1458,7 @@ module IBMWatson
|
|
1251
1458
|
# owns a model to delete it.
|
1252
1459
|
#
|
1253
1460
|
# **See also:** [Deleting a custom language
|
1254
|
-
# model](https://cloud.ibm.com/docs/
|
1461
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#deleteModel-language).
|
1255
1462
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1256
1463
|
# the request. You must make the request with credentials for the instance of the
|
1257
1464
|
# service that owns the custom model.
|
@@ -1299,7 +1506,7 @@ module IBMWatson
|
|
1299
1506
|
# requests to add new resources until the existing request completes.
|
1300
1507
|
#
|
1301
1508
|
# **See also:** [Train the custom language
|
1302
|
-
# model](https://cloud.ibm.com/docs/
|
1509
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#trainModel-language).
|
1303
1510
|
#
|
1304
1511
|
#
|
1305
1512
|
# ### Training failures
|
@@ -1375,7 +1582,7 @@ module IBMWatson
|
|
1375
1582
|
# it.
|
1376
1583
|
#
|
1377
1584
|
# **See also:** [Resetting a custom language
|
1378
|
-
# model](https://cloud.ibm.com/docs/
|
1585
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#resetModel-language).
|
1379
1586
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1380
1587
|
# the request. You must make the request with credentials for the instance of the
|
1381
1588
|
# service that owns the custom model.
|
@@ -1419,7 +1626,7 @@ module IBMWatson
|
|
1419
1626
|
# subsequent requests for the model until the upgrade completes.
|
1420
1627
|
#
|
1421
1628
|
# **See also:** [Upgrading a custom language
|
1422
|
-
# model](https://cloud.ibm.com/docs/
|
1629
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeLanguage).
|
1423
1630
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1424
1631
|
# the request. You must make the request with credentials for the instance of the
|
1425
1632
|
# service that owns the custom model.
|
@@ -1455,7 +1662,7 @@ module IBMWatson
|
|
1455
1662
|
# that owns a model to list its corpora.
|
1456
1663
|
#
|
1457
1664
|
# **See also:** [Listing corpora for a custom language
|
1458
|
-
# model](https://cloud.ibm.com/docs/
|
1665
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#listCorpora).
|
1459
1666
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1460
1667
|
# the request. You must make the request with credentials for the instance of the
|
1461
1668
|
# service that owns the custom model.
|
@@ -1495,18 +1702,20 @@ module IBMWatson
|
|
1495
1702
|
#
|
1496
1703
|
# The call returns an HTTP 201 response code if the corpus is valid. The service
|
1497
1704
|
# then asynchronously processes the contents of the corpus and automatically
|
1498
|
-
# extracts new words that it finds. This can take on the order of
|
1499
|
-
# complete depending on the total number of words and the number of new words in
|
1500
|
-
# corpus, as well as the current load on the service. You cannot submit requests
|
1501
|
-
# add additional resources to the custom model or to train the model until the
|
1705
|
+
# extracts new words that it finds. This operation can take on the order of minutes
|
1706
|
+
# to complete depending on the total number of words and the number of new words in
|
1707
|
+
# the corpus, as well as the current load on the service. You cannot submit requests
|
1708
|
+
# to add additional resources to the custom model or to train the model until the
|
1502
1709
|
# service's analysis of the corpus for the current request completes. Use the **List
|
1503
1710
|
# a corpus** method to check the status of the analysis.
|
1504
1711
|
#
|
1505
1712
|
# The service auto-populates the model's words resource with words from the corpus
|
1506
|
-
# that are not found in its base vocabulary. These are referred to as
|
1507
|
-
# out-of-vocabulary (OOV) words.
|
1508
|
-
#
|
1509
|
-
#
|
1713
|
+
# that are not found in its base vocabulary. These words are referred to as
|
1714
|
+
# out-of-vocabulary (OOV) words. After adding a corpus, you must validate the words
|
1715
|
+
# resource to ensure that each OOV word's definition is complete and valid. You can
|
1716
|
+
# use the **List custom words** method to examine the words resource. You can use
|
1717
|
+
# other words method to eliminate typos and modify how words are pronounced as
|
1718
|
+
# needed.
|
1510
1719
|
#
|
1511
1720
|
# To add a corpus file that has the same name as an existing corpus, set the
|
1512
1721
|
# `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
|
@@ -1523,10 +1732,12 @@ module IBMWatson
|
|
1523
1732
|
# directly.
|
1524
1733
|
#
|
1525
1734
|
# **See also:**
|
1526
|
-
# * [Working with
|
1527
|
-
# corpora](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
|
1528
1735
|
# * [Add a corpus to the custom language
|
1529
|
-
# model](https://cloud.ibm.com/docs/
|
1736
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus)
|
1737
|
+
# * [Working with
|
1738
|
+
# corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
|
1739
|
+
# * [Validating a words
|
1740
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1530
1741
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1531
1742
|
# the request. You must make the request with credentials for the instance of the
|
1532
1743
|
# service that owns the custom model.
|
@@ -1552,7 +1763,7 @@ module IBMWatson
|
|
1552
1763
|
# Make sure that you know the character encoding of the file. You must use that
|
1553
1764
|
# encoding when working with the words in the custom language model. For more
|
1554
1765
|
# information, see [Character
|
1555
|
-
# encoding](https://cloud.ibm.com/docs/
|
1766
|
+
# encoding](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#charEncoding).
|
1556
1767
|
#
|
1557
1768
|
#
|
1558
1769
|
# With the `curl` command, use the `--data-binary` option to upload the file for the
|
@@ -1606,7 +1817,7 @@ module IBMWatson
|
|
1606
1817
|
# that owns a model to list its corpora.
|
1607
1818
|
#
|
1608
1819
|
# **See also:** [Listing corpora for a custom language
|
1609
|
-
# model](https://cloud.ibm.com/docs/
|
1820
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#listCorpora).
|
1610
1821
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1611
1822
|
# the request. You must make the request with credentials for the instance of the
|
1612
1823
|
# service that owns the custom model.
|
@@ -1646,7 +1857,7 @@ module IBMWatson
|
|
1646
1857
|
# corpora.
|
1647
1858
|
#
|
1648
1859
|
# **See also:** [Deleting a corpus from a custom language
|
1649
|
-
# model](https://cloud.ibm.com/docs/
|
1860
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#deleteCorpus).
|
1650
1861
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1651
1862
|
# the request. You must make the request with credentials for the instance of the
|
1652
1863
|
# service that owns the custom model.
|
@@ -1688,7 +1899,7 @@ module IBMWatson
|
|
1688
1899
|
# service that owns a model to list information about its words.
|
1689
1900
|
#
|
1690
1901
|
# **See also:** [Listing words from a custom language
|
1691
|
-
# model](https://cloud.ibm.com/docs/
|
1902
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#listWords).
|
1692
1903
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1693
1904
|
# the request. You must make the request with credentials for the instance of the
|
1694
1905
|
# service that owns the custom model.
|
@@ -1755,7 +1966,10 @@ module IBMWatson
|
|
1755
1966
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1756
1967
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1757
1968
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1758
|
-
# word.
|
1969
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
1970
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
1971
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
1972
|
+
# valid.
|
1759
1973
|
# * The `display_as` field provides a different way of spelling the word in a
|
1760
1974
|
# transcript. Use the parameter when you want the word to appear different from its
|
1761
1975
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1785,10 +1999,12 @@ module IBMWatson
|
|
1785
1999
|
#
|
1786
2000
|
#
|
1787
2001
|
# **See also:**
|
1788
|
-
# * [Working with custom
|
1789
|
-
# words](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1790
2002
|
# * [Add words to the custom language
|
1791
|
-
# model](https://cloud.ibm.com/docs/
|
2003
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
2004
|
+
# * [Working with custom
|
2005
|
+
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
2006
|
+
# * [Validating a words
|
2007
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1792
2008
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1793
2009
|
# the request. You must make the request with credentials for the instance of the
|
1794
2010
|
# service that owns the custom model.
|
@@ -1844,7 +2060,10 @@ module IBMWatson
|
|
1844
2060
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1845
2061
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1846
2062
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1847
|
-
# word.
|
2063
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
2064
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
2065
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
2066
|
+
# valid.
|
1848
2067
|
# * The `display_as` field provides a different way of spelling the word in a
|
1849
2068
|
# transcript. Use the parameter when you want the word to appear different from its
|
1850
2069
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1856,10 +2075,12 @@ module IBMWatson
|
|
1856
2075
|
# the **List a custom word** method to review the word that you add.
|
1857
2076
|
#
|
1858
2077
|
# **See also:**
|
1859
|
-
# * [Working with custom
|
1860
|
-
# words](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1861
2078
|
# * [Add words to the custom language
|
1862
|
-
# model](https://cloud.ibm.com/docs/
|
2079
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
2080
|
+
# * [Working with custom
|
2081
|
+
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
2082
|
+
# * [Validating a words
|
2083
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1863
2084
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1864
2085
|
# the request. You must make the request with credentials for the instance of the
|
1865
2086
|
# service that owns the custom model.
|
@@ -1867,7 +2088,7 @@ module IBMWatson
|
|
1867
2088
|
# not include spaces in the word. Use a `-` (dash) or `_` (underscore) to connect
|
1868
2089
|
# the tokens of compound words. URL-encode the word if it includes non-ASCII
|
1869
2090
|
# characters. For more information, see [Character
|
1870
|
-
# encoding](https://cloud.ibm.com/docs/
|
2091
|
+
# encoding](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#charEncoding).
|
1871
2092
|
# @param word [String] For the **Add custom words** method, you must specify the custom word that is to
|
1872
2093
|
# be added to or updated in the custom model. Do not include spaces in the word. Use
|
1873
2094
|
# a `-` (dash) or `_` (underscore) to connect the tokens of compound words.
|
@@ -1925,13 +2146,13 @@ module IBMWatson
|
|
1925
2146
|
# about its words.
|
1926
2147
|
#
|
1927
2148
|
# **See also:** [Listing words from a custom language
|
1928
|
-
# model](https://cloud.ibm.com/docs/
|
2149
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#listWords).
|
1929
2150
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1930
2151
|
# the request. You must make the request with credentials for the instance of the
|
1931
2152
|
# service that owns the custom model.
|
1932
2153
|
# @param word_name [String] The custom word that is to be read from the custom language model. URL-encode the
|
1933
2154
|
# word if it includes non-ASCII characters. For more information, see [Character
|
1934
|
-
# encoding](https://cloud.ibm.com/docs/
|
2155
|
+
# encoding](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#charEncoding).
|
1935
2156
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1936
2157
|
def get_word(customization_id:, word_name:)
|
1937
2158
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -1966,13 +2187,13 @@ module IBMWatson
|
|
1966
2187
|
# instance of the service that owns a model to delete its words.
|
1967
2188
|
#
|
1968
2189
|
# **See also:** [Deleting a word from a custom language
|
1969
|
-
# model](https://cloud.ibm.com/docs/
|
2190
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#deleteWord).
|
1970
2191
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1971
2192
|
# the request. You must make the request with credentials for the instance of the
|
1972
2193
|
# service that owns the custom model.
|
1973
2194
|
# @param word_name [String] The custom word that is to be deleted from the custom language model. URL-encode
|
1974
2195
|
# the word if it includes non-ASCII characters. For more information, see [Character
|
1975
|
-
# encoding](https://cloud.ibm.com/docs/
|
2196
|
+
# encoding](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#charEncoding).
|
1976
2197
|
# @return [nil]
|
1977
2198
|
def delete_word(customization_id:, word_name:)
|
1978
2199
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2007,7 +2228,7 @@ module IBMWatson
|
|
2007
2228
|
# model to list its grammars.
|
2008
2229
|
#
|
2009
2230
|
# **See also:** [Listing grammars from a custom language
|
2010
|
-
# model](https://cloud.ibm.com/docs/
|
2231
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageGrammars#listGrammars).
|
2011
2232
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2012
2233
|
# the request. You must make the request with credentials for the instance of the
|
2013
2234
|
# service that owns the custom model.
|
@@ -2043,12 +2264,12 @@ module IBMWatson
|
|
2043
2264
|
#
|
2044
2265
|
# The call returns an HTTP 201 response code if the grammar is valid. The service
|
2045
2266
|
# then asynchronously processes the contents of the grammar and automatically
|
2046
|
-
# extracts new words that it finds. This can take a few seconds
|
2047
|
-
# depending on the size and complexity of the grammar, as well as the
|
2048
|
-
# on the service. You cannot submit requests to add additional
|
2049
|
-
# custom model or to train the model until the service's analysis
|
2050
|
-
# the current request completes. Use the **Get a grammar** method
|
2051
|
-
# status of the analysis.
|
2267
|
+
# extracts new words that it finds. This operation can take a few seconds or minutes
|
2268
|
+
# to complete depending on the size and complexity of the grammar, as well as the
|
2269
|
+
# current load on the service. You cannot submit requests to add additional
|
2270
|
+
# resources to the custom model or to train the model until the service's analysis
|
2271
|
+
# of the grammar for the current request completes. Use the **Get a grammar** method
|
2272
|
+
# to check the status of the analysis.
|
2052
2273
|
#
|
2053
2274
|
# The service populates the model's words resource with any word that is recognized
|
2054
2275
|
# by the grammar that is not found in the model's base vocabulary. These are
|
@@ -2071,9 +2292,9 @@ module IBMWatson
|
|
2071
2292
|
#
|
2072
2293
|
# **See also:**
|
2073
2294
|
# * [Understanding
|
2074
|
-
# grammars](https://cloud.ibm.com/docs/
|
2295
|
+
# grammars](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarUnderstand#grammarUnderstand)
|
2075
2296
|
# * [Add a grammar to the custom language
|
2076
|
-
# model](https://cloud.ibm.com/docs/
|
2297
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarAdd#addGrammar).
|
2077
2298
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2078
2299
|
# the request. You must make the request with credentials for the instance of the
|
2079
2300
|
# service that owns the custom model.
|
@@ -2153,7 +2374,7 @@ module IBMWatson
|
|
2153
2374
|
# model to list its grammars.
|
2154
2375
|
#
|
2155
2376
|
# **See also:** [Listing grammars from a custom language
|
2156
|
-
# model](https://cloud.ibm.com/docs/
|
2377
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageGrammars#listGrammars).
|
2157
2378
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2158
2379
|
# the request. You must make the request with credentials for the instance of the
|
2159
2380
|
# service that owns the custom model.
|
@@ -2192,7 +2413,7 @@ module IBMWatson
|
|
2192
2413
|
# for the instance of the service that owns a model to delete its grammar.
|
2193
2414
|
#
|
2194
2415
|
# **See also:** [Deleting a grammar from a custom language
|
2195
|
-
# model](https://cloud.ibm.com/docs/
|
2416
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageGrammars#deleteGrammar).
|
2196
2417
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
2197
2418
|
# the request. You must make the request with credentials for the instance of the
|
2198
2419
|
# service that owns the custom model.
|
@@ -2230,13 +2451,13 @@ module IBMWatson
|
|
2230
2451
|
# model is owned by the instance of the service whose credentials are used to create
|
2231
2452
|
# it.
|
2232
2453
|
#
|
2233
|
-
# You can create a maximum of 1024 custom acoustic models
|
2234
|
-
# service returns an error if you attempt to create more than 1024 models. You
|
2235
|
-
# not lose any models, but you cannot create any more until your model count is
|
2454
|
+
# You can create a maximum of 1024 custom acoustic models per owning credentials.
|
2455
|
+
# The service returns an error if you attempt to create more than 1024 models. You
|
2456
|
+
# do not lose any models, but you cannot create any more until your model count is
|
2236
2457
|
# below the limit.
|
2237
2458
|
#
|
2238
2459
|
# **See also:** [Create a custom acoustic
|
2239
|
-
# model](https://cloud.ibm.com/docs/
|
2460
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acoustic#createModel-acoustic).
|
2240
2461
|
# @param name [String] A user-defined name for the new custom acoustic model. Use a name that is unique
|
2241
2462
|
# among all custom acoustic models that you own. Use a localized name that matches
|
2242
2463
|
# the language of the custom model. Use a name that describes the acoustic
|
@@ -2248,7 +2469,7 @@ module IBMWatson
|
|
2248
2469
|
#
|
2249
2470
|
# To determine whether a base model supports acoustic model customization, refer to
|
2250
2471
|
# [Language support for
|
2251
|
-
# customization](https://cloud.ibm.com/docs/
|
2472
|
+
# customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customization#languageSupport).
|
2252
2473
|
# @param description [String] A description of the new custom acoustic model. Use a localized description that
|
2253
2474
|
# matches the language of the custom model.
|
2254
2475
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
@@ -2290,7 +2511,7 @@ module IBMWatson
|
|
2290
2511
|
# a model to list information about it.
|
2291
2512
|
#
|
2292
2513
|
# **See also:** [Listing custom acoustic
|
2293
|
-
# models](https://cloud.ibm.com/docs/
|
2514
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic).
|
2294
2515
|
# @param language [String] The identifier of the language for which custom language or custom acoustic models
|
2295
2516
|
# are to be returned (for example, `en-US`). Omit the parameter to see all custom
|
2296
2517
|
# language or custom acoustic models that are owned by the requesting credentials.
|
@@ -2324,7 +2545,7 @@ module IBMWatson
|
|
2324
2545
|
# for the instance of the service that owns a model to list information about it.
|
2325
2546
|
#
|
2326
2547
|
# **See also:** [Listing custom acoustic
|
2327
|
-
# models](https://cloud.ibm.com/docs/
|
2548
|
+
# models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic).
|
2328
2549
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2329
2550
|
# the request. You must make the request with credentials for the instance of the
|
2330
2551
|
# service that owns the custom model.
|
@@ -2357,7 +2578,7 @@ module IBMWatson
|
|
2357
2578
|
# model to delete it.
|
2358
2579
|
#
|
2359
2580
|
# **See also:** [Deleting a custom acoustic
|
2360
|
-
# model](https://cloud.ibm.com/docs/
|
2581
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#deleteModel-acoustic).
|
2361
2582
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2362
2583
|
# the request. You must make the request with credentials for the instance of the
|
2363
2584
|
# service that owns the custom model.
|
@@ -2395,7 +2616,7 @@ module IBMWatson
|
|
2395
2616
|
# to complete depending on the total amount of audio data on which the custom
|
2396
2617
|
# acoustic model is being trained and the current load on the service. Typically,
|
2397
2618
|
# training a custom acoustic model takes approximately two to four times the length
|
2398
|
-
# of its audio data. The
|
2619
|
+
# of its audio data. The actual time depends on the model being trained and the
|
2399
2620
|
# nature of the audio, such as whether the audio is clean or noisy. The method
|
2400
2621
|
# returns an HTTP 200 response code to indicate that the training process has begun.
|
2401
2622
|
#
|
@@ -2414,14 +2635,15 @@ module IBMWatson
|
|
2414
2635
|
# Train with a custom language model if you have verbatim transcriptions of the
|
2415
2636
|
# audio files that you have added to the custom model or you have either corpora
|
2416
2637
|
# (text files) or a list of words that are relevant to the contents of the audio
|
2417
|
-
# files.
|
2418
|
-
# base model
|
2638
|
+
# files. For training to succeed, both of the custom models must be based on the
|
2639
|
+
# same version of the same base model, and the custom language model must be fully
|
2640
|
+
# trained and available.
|
2419
2641
|
#
|
2420
2642
|
# **See also:**
|
2421
2643
|
# * [Train the custom acoustic
|
2422
|
-
# model](https://cloud.ibm.com/docs/
|
2644
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acoustic#trainModel-acoustic)
|
2423
2645
|
# * [Using custom acoustic and custom language models
|
2424
|
-
# together](https://cloud.ibm.com/docs/
|
2646
|
+
# together](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-useBoth#useBoth)
|
2425
2647
|
#
|
2426
2648
|
#
|
2427
2649
|
# ### Training failures
|
@@ -2431,6 +2653,9 @@ module IBMWatson
|
|
2431
2653
|
# another training request or a request to add audio resources to the model.
|
2432
2654
|
# * The custom model contains less than 10 minutes or more than 200 hours of audio
|
2433
2655
|
# data.
|
2656
|
+
# * You passed a custom language model with the `custom_language_model_id` query
|
2657
|
+
# parameter that is not in the available state. A custom language model must be
|
2658
|
+
# fully trained and available to be used to train a custom acoustic model.
|
2434
2659
|
# * You passed an incompatible custom language model with the
|
2435
2660
|
# `custom_language_model_id` query parameter. Both custom models must be based on
|
2436
2661
|
# the same version of the same base model.
|
@@ -2446,8 +2671,8 @@ module IBMWatson
|
|
2446
2671
|
# been trained with verbatim transcriptions of the audio resources or that contains
|
2447
2672
|
# words that are relevant to the contents of the audio resources. The custom
|
2448
2673
|
# language model must be based on the same version of the same base model as the
|
2449
|
-
# custom acoustic model
|
2450
|
-
# custom models.
|
2674
|
+
# custom acoustic model, and the custom language model must be fully trained and
|
2675
|
+
# available. The credentials specified with the request must own both custom models.
|
2451
2676
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2452
2677
|
def train_acoustic_model(customization_id:, custom_language_model_id: nil)
|
2453
2678
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2486,7 +2711,7 @@ module IBMWatson
|
|
2486
2711
|
# owns a model to reset it.
|
2487
2712
|
#
|
2488
2713
|
# **See also:** [Resetting a custom acoustic
|
2489
|
-
# model](https://cloud.ibm.com/docs/
|
2714
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#resetModel-acoustic).
|
2490
2715
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2491
2716
|
# the request. You must make the request with credentials for the instance of the
|
2492
2717
|
# service that owns the custom model.
|
@@ -2539,20 +2764,21 @@ module IBMWatson
|
|
2539
2764
|
# acoustic model was not trained with a custom language model.
|
2540
2765
|
#
|
2541
2766
|
# **See also:** [Upgrading a custom acoustic
|
2542
|
-
# model](https://cloud.ibm.com/docs/
|
2767
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeAcoustic).
|
2543
2768
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2544
2769
|
# the request. You must make the request with credentials for the instance of the
|
2545
2770
|
# service that owns the custom model.
|
2546
2771
|
# @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
|
2547
2772
|
# customization ID (GUID) of that custom language model. The custom language model
|
2548
|
-
# must be upgraded before the custom acoustic model can be upgraded. The
|
2549
|
-
#
|
2773
|
+
# must be upgraded before the custom acoustic model can be upgraded. The custom
|
2774
|
+
# language model must be fully trained and available. The credentials specified with
|
2775
|
+
# the request must own both custom models.
|
2550
2776
|
# @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
|
2551
2777
|
# has been modified since it was last trained. Use this parameter only to force the
|
2552
2778
|
# upgrade of a custom acoustic model that is trained with a custom language model,
|
2553
2779
|
# and only if you receive a 400 response code and the message `No input data
|
2554
2780
|
# modified since last training`. See [Upgrading a custom acoustic
|
2555
|
-
# model](https://cloud.ibm.com/docs/
|
2781
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeAcoustic).
|
2556
2782
|
# @return [nil]
|
2557
2783
|
def upgrade_acoustic_model(customization_id:, custom_language_model_id: nil, force: nil)
|
2558
2784
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2593,7 +2819,7 @@ module IBMWatson
|
|
2593
2819
|
# the instance of the service that owns a model to list its audio resources.
|
2594
2820
|
#
|
2595
2821
|
# **See also:** [Listing audio resources for a custom acoustic
|
2596
|
-
# model](https://cloud.ibm.com/docs/
|
2822
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAudio#listAudio).
|
2597
2823
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2598
2824
|
# the request. You must make the request with credentials for the instance of the
|
2599
2825
|
# service that owns the custom model.
|
@@ -2641,14 +2867,14 @@ module IBMWatson
|
|
2641
2867
|
# same name as an existing audio resource, set the `allow_overwrite` parameter to
|
2642
2868
|
# `true`; otherwise, the request fails.
|
2643
2869
|
#
|
2644
|
-
# The method is asynchronous. It can take several seconds to complete
|
2645
|
-
# the duration of the audio and, in the case of an archive file, the
|
2646
|
-
# audio files being processed. The service returns a 201 response
|
2647
|
-
# is valid. It then asynchronously analyzes the contents of the
|
2648
|
-
# and automatically extracts information about the audio such as
|
2649
|
-
# sampling rate, and encoding. You cannot submit requests to train or
|
2650
|
-
# model until the service's analysis of all audio resources for current
|
2651
|
-
# completes.
|
2870
|
+
# The method is asynchronous. It can take several seconds or minutes to complete
|
2871
|
+
# depending on the duration of the audio and, in the case of an archive file, the
|
2872
|
+
# total number of audio files being processed. The service returns a 201 response
|
2873
|
+
# code if the audio is valid. It then asynchronously analyzes the contents of the
|
2874
|
+
# audio file or files and automatically extracts information about the audio such as
|
2875
|
+
# its length, sampling rate, and encoding. You cannot submit requests to train or
|
2876
|
+
# upgrade the model until the service's analysis of all audio resources for current
|
2877
|
+
# requests completes.
|
2652
2878
|
#
|
2653
2879
|
# To determine the status of the service's analysis of the audio, use the **Get an
|
2654
2880
|
# audio resource** method to poll the status of the audio. The method accepts the
|
@@ -2657,7 +2883,7 @@ module IBMWatson
|
|
2657
2883
|
# every few seconds until it becomes `ok`.
|
2658
2884
|
#
|
2659
2885
|
# **See also:** [Add audio to the custom acoustic
|
2660
|
-
# model](https://cloud.ibm.com/docs/
|
2886
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acoustic#addAudio).
|
2661
2887
|
#
|
2662
2888
|
#
|
2663
2889
|
# ### Content types for audio-type resources
|
@@ -2691,7 +2917,7 @@ module IBMWatson
|
|
2691
2917
|
# service labels the audio file as `invalid`.
|
2692
2918
|
#
|
2693
2919
|
# **See also:** [Audio
|
2694
|
-
# formats](https://cloud.ibm.com/docs/
|
2920
|
+
# formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats).
|
2695
2921
|
#
|
2696
2922
|
#
|
2697
2923
|
# ### Content types for archive-type resources
|
@@ -2827,7 +3053,7 @@ module IBMWatson
|
|
2827
3053
|
# its audio resources.
|
2828
3054
|
#
|
2829
3055
|
# **See also:** [Listing audio resources for a custom acoustic
|
2830
|
-
# model](https://cloud.ibm.com/docs/
|
3056
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAudio#listAudio).
|
2831
3057
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2832
3058
|
# the request. You must make the request with credentials for the instance of the
|
2833
3059
|
# service that owns the custom model.
|
@@ -2868,7 +3094,7 @@ module IBMWatson
|
|
2868
3094
|
# service that owns a model to delete its audio resources.
|
2869
3095
|
#
|
2870
3096
|
# **See also:** [Deleting an audio resource from a custom acoustic
|
2871
|
-
# model](https://cloud.ibm.com/docs/
|
3097
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAudio#deleteAudio).
|
2872
3098
|
# @param customization_id [String] The customization ID (GUID) of the custom acoustic model that is to be used for
|
2873
3099
|
# the request. You must make the request with credentials for the instance of the
|
2874
3100
|
# service that owns the custom model.
|
@@ -2911,7 +3137,7 @@ module IBMWatson
|
|
2911
3137
|
# with a request that passes the data.
|
2912
3138
|
#
|
2913
3139
|
# **See also:** [Information
|
2914
|
-
# security](https://cloud.ibm.com/docs/
|
3140
|
+
# security](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-information-security#information-security).
|
2915
3141
|
# @param customer_id [String] The customer ID for which all data is to be deleted.
|
2916
3142
|
# @return [nil]
|
2917
3143
|
def delete_user_data(customer_id:)
|