ibm_watson 1.3.1 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -2
- data/lib/ibm_watson/assistant_v1.rb +13 -5
- data/lib/ibm_watson/assistant_v2.rb +8 -2
- data/lib/ibm_watson/compare_comply_v1.rb +10 -4
- data/lib/ibm_watson/discovery_v1.rb +13 -7
- data/lib/ibm_watson/discovery_v2.rb +9 -3
- data/lib/ibm_watson/language_translator_v3.rb +25 -11
- data/lib/ibm_watson/natural_language_classifier_v1.rb +8 -2
- data/lib/ibm_watson/natural_language_understanding_v1.rb +14 -8
- data/lib/ibm_watson/personality_insights_v3.rb +8 -2
- data/lib/ibm_watson/speech_to_text_v1.rb +186 -65
- data/lib/ibm_watson/text_to_speech_v1.rb +26 -17
- data/lib/ibm_watson/tone_analyzer_v3.rb +8 -2
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +8 -2
- data/lib/ibm_watson/visual_recognition_v4.rb +8 -2
- data/test/integration/test_compare_comply_v1.rb +1 -12
- metadata +2 -2
@@ -47,6 +47,8 @@ module IBMWatson
|
|
47
47
|
# The Personality Insights V3 service.
|
48
48
|
class PersonalityInsightsV3 < IBMCloudSdkCore::BaseService
|
49
49
|
include Concurrent::Async
|
50
|
+
DEFAULT_SERVICE_NAME = "personality_insights"
|
51
|
+
DEFAULT_SERVICE_URL = "https://gateway.watsonplatform.net/personality-insights/api"
|
50
52
|
##
|
51
53
|
# @!method initialize(args)
|
52
54
|
# Construct a new client for the Personality Insights service.
|
@@ -65,19 +67,23 @@ module IBMWatson
|
|
65
67
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
66
68
|
# The base service_url may differ between IBM Cloud regions.
|
67
69
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
70
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
71
|
+
# any external configuration, if applicable.
|
68
72
|
def initialize(args = {})
|
69
73
|
@__async_initialized__ = false
|
70
74
|
defaults = {}
|
71
75
|
defaults[:version] = nil
|
72
|
-
defaults[:service_url] =
|
76
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
77
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
73
78
|
defaults[:authenticator] = nil
|
79
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
74
80
|
args = defaults.merge(args)
|
75
81
|
@version = args[:version]
|
76
82
|
raise ArgumentError.new("version must be provided") if @version.nil?
|
77
83
|
|
78
|
-
args[:service_name] = "personality_insights"
|
79
84
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
80
85
|
super
|
86
|
+
@service_url = user_service_url unless user_service_url.nil?
|
81
87
|
end
|
82
88
|
|
83
89
|
#########################
|
@@ -34,9 +34,9 @@
|
|
34
34
|
# is a formal language specification that lets you restrict the phrases that the service
|
35
35
|
# can recognize.
|
36
36
|
#
|
37
|
-
# Language model customization
|
38
|
-
#
|
39
|
-
#
|
37
|
+
# Language model customization and acoustic model customization are generally available
|
38
|
+
# for production use with all language models that are generally available. Grammars are
|
39
|
+
# beta functionality for all language models that support language model customization.
|
40
40
|
|
41
41
|
require "concurrent"
|
42
42
|
require "erb"
|
@@ -50,6 +50,8 @@ module IBMWatson
|
|
50
50
|
# The Speech to Text V1 service.
|
51
51
|
class SpeechToTextV1 < IBMCloudSdkCore::BaseService
|
52
52
|
include Concurrent::Async
|
53
|
+
DEFAULT_SERVICE_NAME = "speech_to_text"
|
54
|
+
DEFAULT_SERVICE_URL = "https://stream.watsonplatform.net/speech-to-text/api"
|
53
55
|
##
|
54
56
|
# @!method initialize(args)
|
55
57
|
# Construct a new client for the Speech to Text service.
|
@@ -58,15 +60,19 @@ module IBMWatson
|
|
58
60
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
59
61
|
# The base service_url may differ between IBM Cloud regions.
|
60
62
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
63
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
64
|
+
# any external configuration, if applicable.
|
61
65
|
def initialize(args = {})
|
62
66
|
@__async_initialized__ = false
|
63
67
|
defaults = {}
|
64
|
-
defaults[:service_url] =
|
68
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
69
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
65
70
|
defaults[:authenticator] = nil
|
71
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
66
72
|
args = defaults.merge(args)
|
67
|
-
args[:service_name] = "speech_to_text"
|
68
73
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
69
74
|
super
|
75
|
+
@service_url = user_service_url unless user_service_url.nil?
|
70
76
|
end
|
71
77
|
|
72
78
|
#########################
|
@@ -135,7 +141,7 @@ module IBMWatson
|
|
135
141
|
#########################
|
136
142
|
|
137
143
|
##
|
138
|
-
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
144
|
+
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
139
145
|
# Recognize audio.
|
140
146
|
# Sends audio and returns transcription results for a recognition request. You can
|
141
147
|
# pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
|
@@ -277,8 +283,14 @@ module IBMWatson
|
|
277
283
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
278
284
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
279
285
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
280
|
-
# threshold.
|
281
|
-
#
|
286
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
287
|
+
# keywords.
|
288
|
+
#
|
289
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
290
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
291
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
292
|
+
#
|
293
|
+
# See [Keyword
|
282
294
|
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
283
295
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
284
296
|
# considered to match a keyword if its confidence is greater than or equal to the
|
@@ -323,11 +335,11 @@ module IBMWatson
|
|
323
335
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
324
336
|
# parameter.
|
325
337
|
#
|
326
|
-
# **Note:** Applies to US English, Japanese, and Spanish (both
|
327
|
-
# narrowband models) and UK English (narrowband model) transcription
|
328
|
-
# determine whether a language model supports speaker labels, you can also
|
329
|
-
# **Get a model** method and check that the attribute `speaker_labels` is
|
330
|
-
# `true`.
|
338
|
+
# **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
|
339
|
+
# broadband and narrowband models) and UK English (narrowband model) transcription
|
340
|
+
# only. To determine whether a language model supports speaker labels, you can also
|
341
|
+
# use the **Get a model** method and check that the attribute `speaker_labels` is
|
342
|
+
# set to `true`.
|
331
343
|
#
|
332
344
|
# See [Speaker
|
333
345
|
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
@@ -388,8 +400,33 @@ module IBMWatson
|
|
388
400
|
#
|
389
401
|
# See [Split transcript at phrase
|
390
402
|
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
403
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
404
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
405
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
406
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
407
|
+
# activity.
|
408
|
+
#
|
409
|
+
# Specify a value between 0.0 and 1.0:
|
410
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
411
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
412
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
413
|
+
#
|
414
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
415
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
416
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
417
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
418
|
+
# conversations or background noise.
|
419
|
+
#
|
420
|
+
# Specify a value in the range of 0.0 to 1.0:
|
421
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
422
|
+
# disabled).
|
423
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
424
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
425
|
+
#
|
426
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
427
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
391
428
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
392
|
-
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
429
|
+
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
393
430
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
394
431
|
|
395
432
|
headers = {
|
@@ -420,7 +457,9 @@ module IBMWatson
|
|
420
457
|
"redaction" => redaction,
|
421
458
|
"audio_metrics" => audio_metrics,
|
422
459
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
423
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
460
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
461
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
462
|
+
"background_audio_suppression" => background_audio_suppression
|
424
463
|
}
|
425
464
|
|
426
465
|
data = audio
|
@@ -439,7 +478,7 @@ module IBMWatson
|
|
439
478
|
end
|
440
479
|
|
441
480
|
##
|
442
|
-
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
481
|
+
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
443
482
|
# Sends audio for speech recognition using web sockets.
|
444
483
|
# @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
|
445
484
|
# @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
|
@@ -531,6 +570,32 @@ module IBMWatson
|
|
531
570
|
#
|
532
571
|
# See [Split transcript at phrase
|
533
572
|
# end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
|
573
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
574
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
575
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
576
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
577
|
+
# activity.
|
578
|
+
#
|
579
|
+
# Specify a value between 0.0 and 1.0:
|
580
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
581
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
582
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
583
|
+
#
|
584
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
585
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
586
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
587
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
588
|
+
# conversations or background noise.
|
589
|
+
#
|
590
|
+
# Specify a value in the range of 0.0 to 1.0:
|
591
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
592
|
+
# disabled).
|
593
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
594
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
595
|
+
#
|
596
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
597
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
598
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
534
599
|
def recognize_using_websocket(
|
535
600
|
content_type: nil,
|
536
601
|
recognize_callback:,
|
@@ -559,7 +624,9 @@ module IBMWatson
|
|
559
624
|
processing_metrics_interval: nil,
|
560
625
|
audio_metrics: nil,
|
561
626
|
end_of_phrase_silence_time: nil,
|
562
|
-
split_transcript_at_phrase_end: nil
|
627
|
+
split_transcript_at_phrase_end: nil,
|
628
|
+
speech_detector_sensitivity: nil,
|
629
|
+
background_audio_suppression: nil
|
563
630
|
)
|
564
631
|
raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
|
565
632
|
raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
|
@@ -599,7 +666,9 @@ module IBMWatson
|
|
599
666
|
"processing_metrics_interval" => processing_metrics_interval,
|
600
667
|
"audio_metrics" => audio_metrics,
|
601
668
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
602
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
669
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
670
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
671
|
+
"background_audio_suppression" => background_audio_suppression
|
603
672
|
}
|
604
673
|
options.delete_if { |_, v| v.nil? }
|
605
674
|
WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
|
@@ -717,7 +786,7 @@ module IBMWatson
|
|
717
786
|
end
|
718
787
|
|
719
788
|
##
|
720
|
-
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
789
|
+
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
721
790
|
# Create a job.
|
722
791
|
# Creates a job for a new asynchronous recognition request. The job is owned by the
|
723
792
|
# instance of the service whose credentials are used to create it. How you learn the
|
@@ -903,8 +972,14 @@ module IBMWatson
|
|
903
972
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
904
973
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
905
974
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
906
|
-
# threshold.
|
907
|
-
#
|
975
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
976
|
+
# keywords.
|
977
|
+
#
|
978
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
979
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
980
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
981
|
+
#
|
982
|
+
# See [Keyword
|
908
983
|
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
909
984
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
910
985
|
# considered to match a keyword if its confidence is greater than or equal to the
|
@@ -949,11 +1024,11 @@ module IBMWatson
|
|
949
1024
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
950
1025
|
# parameter.
|
951
1026
|
#
|
952
|
-
# **Note:** Applies to US English, Japanese, and Spanish (both
|
953
|
-
# narrowband models) and UK English (narrowband model) transcription
|
954
|
-
# determine whether a language model supports speaker labels, you can also
|
955
|
-
# **Get a model** method and check that the attribute `speaker_labels` is
|
956
|
-
# `true`.
|
1027
|
+
# **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
|
1028
|
+
# broadband and narrowband models) and UK English (narrowband model) transcription
|
1029
|
+
# only. To determine whether a language model supports speaker labels, you can also
|
1030
|
+
# use the **Get a model** method and check that the attribute `speaker_labels` is
|
1031
|
+
# set to `true`.
|
957
1032
|
#
|
958
1033
|
# See [Speaker
|
959
1034
|
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
@@ -1036,8 +1111,33 @@ module IBMWatson
|
|
1036
1111
|
#
|
1037
1112
|
# See [Split transcript at phrase
|
1038
1113
|
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
1114
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
1115
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
1116
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
1117
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
1118
|
+
# activity.
|
1119
|
+
#
|
1120
|
+
# Specify a value between 0.0 and 1.0:
|
1121
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
1122
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
1123
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
1124
|
+
#
|
1125
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1126
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1127
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
1128
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
1129
|
+
# conversations or background noise.
|
1130
|
+
#
|
1131
|
+
# Specify a value in the range of 0.0 to 1.0:
|
1132
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
1133
|
+
# disabled).
|
1134
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
1135
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
1136
|
+
#
|
1137
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1138
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1039
1139
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1040
|
-
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
1140
|
+
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
1041
1141
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
1042
1142
|
|
1043
1143
|
headers = {
|
@@ -1074,7 +1174,9 @@ module IBMWatson
|
|
1074
1174
|
"processing_metrics_interval" => processing_metrics_interval,
|
1075
1175
|
"audio_metrics" => audio_metrics,
|
1076
1176
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
1077
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
1177
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
1178
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
1179
|
+
"background_audio_suppression" => background_audio_suppression
|
1078
1180
|
}
|
1079
1181
|
|
1080
1182
|
data = audio
|
@@ -1600,18 +1702,20 @@ module IBMWatson
|
|
1600
1702
|
#
|
1601
1703
|
# The call returns an HTTP 201 response code if the corpus is valid. The service
|
1602
1704
|
# then asynchronously processes the contents of the corpus and automatically
|
1603
|
-
# extracts new words that it finds. This can take on the order of
|
1604
|
-
# complete depending on the total number of words and the number of new words in
|
1605
|
-
# corpus, as well as the current load on the service. You cannot submit requests
|
1606
|
-
# add additional resources to the custom model or to train the model until the
|
1705
|
+
# extracts new words that it finds. This operation can take on the order of minutes
|
1706
|
+
# to complete depending on the total number of words and the number of new words in
|
1707
|
+
# the corpus, as well as the current load on the service. You cannot submit requests
|
1708
|
+
# to add additional resources to the custom model or to train the model until the
|
1607
1709
|
# service's analysis of the corpus for the current request completes. Use the **List
|
1608
1710
|
# a corpus** method to check the status of the analysis.
|
1609
1711
|
#
|
1610
1712
|
# The service auto-populates the model's words resource with words from the corpus
|
1611
|
-
# that are not found in its base vocabulary. These are referred to as
|
1612
|
-
# out-of-vocabulary (OOV) words.
|
1613
|
-
#
|
1614
|
-
#
|
1713
|
+
# that are not found in its base vocabulary. These words are referred to as
|
1714
|
+
# out-of-vocabulary (OOV) words. After adding a corpus, you must validate the words
|
1715
|
+
# resource to ensure that each OOV word's definition is complete and valid. You can
|
1716
|
+
# use the **List custom words** method to examine the words resource. You can use
|
1717
|
+
# other words method to eliminate typos and modify how words are pronounced as
|
1718
|
+
# needed.
|
1615
1719
|
#
|
1616
1720
|
# To add a corpus file that has the same name as an existing corpus, set the
|
1617
1721
|
# `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
|
@@ -1628,10 +1732,12 @@ module IBMWatson
|
|
1628
1732
|
# directly.
|
1629
1733
|
#
|
1630
1734
|
# **See also:**
|
1735
|
+
# * [Add a corpus to the custom language
|
1736
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus)
|
1631
1737
|
# * [Working with
|
1632
1738
|
# corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
|
1633
|
-
# * [
|
1634
|
-
#
|
1739
|
+
# * [Validating a words
|
1740
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1635
1741
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1636
1742
|
# the request. You must make the request with credentials for the instance of the
|
1637
1743
|
# service that owns the custom model.
|
@@ -1860,7 +1966,10 @@ module IBMWatson
|
|
1860
1966
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1861
1967
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1862
1968
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1863
|
-
# word.
|
1969
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
1970
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
1971
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
1972
|
+
# valid.
|
1864
1973
|
# * The `display_as` field provides a different way of spelling the word in a
|
1865
1974
|
# transcript. Use the parameter when you want the word to appear different from its
|
1866
1975
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1890,10 +1999,12 @@ module IBMWatson
|
|
1890
1999
|
#
|
1891
2000
|
#
|
1892
2001
|
# **See also:**
|
2002
|
+
# * [Add words to the custom language
|
2003
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
1893
2004
|
# * [Working with custom
|
1894
2005
|
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1895
|
-
# * [
|
1896
|
-
#
|
2006
|
+
# * [Validating a words
|
2007
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1897
2008
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1898
2009
|
# the request. You must make the request with credentials for the instance of the
|
1899
2010
|
# service that owns the custom model.
|
@@ -1949,7 +2060,10 @@ module IBMWatson
|
|
1949
2060
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1950
2061
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1951
2062
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1952
|
-
# word.
|
2063
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
2064
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
2065
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
2066
|
+
# valid.
|
1953
2067
|
# * The `display_as` field provides a different way of spelling the word in a
|
1954
2068
|
# transcript. Use the parameter when you want the word to appear different from its
|
1955
2069
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1961,10 +2075,12 @@ module IBMWatson
|
|
1961
2075
|
# the **List a custom word** method to review the word that you add.
|
1962
2076
|
#
|
1963
2077
|
# **See also:**
|
2078
|
+
# * [Add words to the custom language
|
2079
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
1964
2080
|
# * [Working with custom
|
1965
2081
|
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1966
|
-
# * [
|
1967
|
-
#
|
2082
|
+
# * [Validating a words
|
2083
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1968
2084
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1969
2085
|
# the request. You must make the request with credentials for the instance of the
|
1970
2086
|
# service that owns the custom model.
|
@@ -2148,12 +2264,12 @@ module IBMWatson
|
|
2148
2264
|
#
|
2149
2265
|
# The call returns an HTTP 201 response code if the grammar is valid. The service
|
2150
2266
|
# then asynchronously processes the contents of the grammar and automatically
|
2151
|
-
# extracts new words that it finds. This can take a few seconds
|
2152
|
-
# depending on the size and complexity of the grammar, as well as the
|
2153
|
-
# on the service. You cannot submit requests to add additional
|
2154
|
-
# custom model or to train the model until the service's analysis
|
2155
|
-
# the current request completes. Use the **Get a grammar** method
|
2156
|
-
# status of the analysis.
|
2267
|
+
# extracts new words that it finds. This operation can take a few seconds or minutes
|
2268
|
+
# to complete depending on the size and complexity of the grammar, as well as the
|
2269
|
+
# current load on the service. You cannot submit requests to add additional
|
2270
|
+
# resources to the custom model or to train the model until the service's analysis
|
2271
|
+
# of the grammar for the current request completes. Use the **Get a grammar** method
|
2272
|
+
# to check the status of the analysis.
|
2157
2273
|
#
|
2158
2274
|
# The service populates the model's words resource with any word that is recognized
|
2159
2275
|
# by the grammar that is not found in the model's base vocabulary. These are
|
@@ -2500,7 +2616,7 @@ module IBMWatson
|
|
2500
2616
|
# to complete depending on the total amount of audio data on which the custom
|
2501
2617
|
# acoustic model is being trained and the current load on the service. Typically,
|
2502
2618
|
# training a custom acoustic model takes approximately two to four times the length
|
2503
|
-
# of its audio data. The
|
2619
|
+
# of its audio data. The actual time depends on the model being trained and the
|
2504
2620
|
# nature of the audio, such as whether the audio is clean or noisy. The method
|
2505
2621
|
# returns an HTTP 200 response code to indicate that the training process has begun.
|
2506
2622
|
#
|
@@ -2519,8 +2635,9 @@ module IBMWatson
|
|
2519
2635
|
# Train with a custom language model if you have verbatim transcriptions of the
|
2520
2636
|
# audio files that you have added to the custom model or you have either corpora
|
2521
2637
|
# (text files) or a list of words that are relevant to the contents of the audio
|
2522
|
-
# files.
|
2523
|
-
# base model
|
2638
|
+
# files. For training to succeed, both of the custom models must be based on the
|
2639
|
+
# same version of the same base model, and the custom language model must be fully
|
2640
|
+
# trained and available.
|
2524
2641
|
#
|
2525
2642
|
# **See also:**
|
2526
2643
|
# * [Train the custom acoustic
|
@@ -2536,6 +2653,9 @@ module IBMWatson
|
|
2536
2653
|
# another training request or a request to add audio resources to the model.
|
2537
2654
|
# * The custom model contains less than 10 minutes or more than 200 hours of audio
|
2538
2655
|
# data.
|
2656
|
+
# * You passed a custom language model with the `custom_language_model_id` query
|
2657
|
+
# parameter that is not in the available state. A custom language model must be
|
2658
|
+
# fully trained and available to be used to train a custom acoustic model.
|
2539
2659
|
# * You passed an incompatible custom language model with the
|
2540
2660
|
# `custom_language_model_id` query parameter. Both custom models must be based on
|
2541
2661
|
# the same version of the same base model.
|
@@ -2551,8 +2671,8 @@ module IBMWatson
|
|
2551
2671
|
# been trained with verbatim transcriptions of the audio resources or that contains
|
2552
2672
|
# words that are relevant to the contents of the audio resources. The custom
|
2553
2673
|
# language model must be based on the same version of the same base model as the
|
2554
|
-
# custom acoustic model
|
2555
|
-
# custom models.
|
2674
|
+
# custom acoustic model, and the custom language model must be fully trained and
|
2675
|
+
# available. The credentials specified with the request must own both custom models.
|
2556
2676
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2557
2677
|
def train_acoustic_model(customization_id:, custom_language_model_id: nil)
|
2558
2678
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2650,8 +2770,9 @@ module IBMWatson
|
|
2650
2770
|
# service that owns the custom model.
|
2651
2771
|
# @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
|
2652
2772
|
# customization ID (GUID) of that custom language model. The custom language model
|
2653
|
-
# must be upgraded before the custom acoustic model can be upgraded. The
|
2654
|
-
#
|
2773
|
+
# must be upgraded before the custom acoustic model can be upgraded. The custom
|
2774
|
+
# language model must be fully trained and available. The credentials specified with
|
2775
|
+
# the request must own both custom models.
|
2655
2776
|
# @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
|
2656
2777
|
# has been modified since it was last trained. Use this parameter only to force the
|
2657
2778
|
# upgrade of a custom acoustic model that is trained with a custom language model,
|
@@ -2746,14 +2867,14 @@ module IBMWatson
|
|
2746
2867
|
# same name as an existing audio resource, set the `allow_overwrite` parameter to
|
2747
2868
|
# `true`; otherwise, the request fails.
|
2748
2869
|
#
|
2749
|
-
# The method is asynchronous. It can take several seconds to complete
|
2750
|
-
# the duration of the audio and, in the case of an archive file, the
|
2751
|
-
# audio files being processed. The service returns a 201 response
|
2752
|
-
# is valid. It then asynchronously analyzes the contents of the
|
2753
|
-
# and automatically extracts information about the audio such as
|
2754
|
-
# sampling rate, and encoding. You cannot submit requests to train or
|
2755
|
-
# model until the service's analysis of all audio resources for current
|
2756
|
-
# completes.
|
2870
|
+
# The method is asynchronous. It can take several seconds or minutes to complete
|
2871
|
+
# depending on the duration of the audio and, in the case of an archive file, the
|
2872
|
+
# total number of audio files being processed. The service returns a 201 response
|
2873
|
+
# code if the audio is valid. It then asynchronously analyzes the contents of the
|
2874
|
+
# audio file or files and automatically extracts information about the audio such as
|
2875
|
+
# its length, sampling rate, and encoding. You cannot submit requests to train or
|
2876
|
+
# upgrade the model until the service's analysis of all audio resources for current
|
2877
|
+
# requests completes.
|
2757
2878
|
#
|
2758
2879
|
# To determine the status of the service's analysis of the audio, use the **Get an
|
2759
2880
|
# audio resource** method to poll the status of the audio. The method accepts the
|