ibm_watson 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -2
- data/lib/ibm_watson/assistant_v1.rb +13 -5
- data/lib/ibm_watson/assistant_v2.rb +8 -2
- data/lib/ibm_watson/compare_comply_v1.rb +10 -4
- data/lib/ibm_watson/discovery_v1.rb +13 -7
- data/lib/ibm_watson/discovery_v2.rb +9 -3
- data/lib/ibm_watson/language_translator_v3.rb +25 -11
- data/lib/ibm_watson/natural_language_classifier_v1.rb +8 -2
- data/lib/ibm_watson/natural_language_understanding_v1.rb +14 -8
- data/lib/ibm_watson/personality_insights_v3.rb +8 -2
- data/lib/ibm_watson/speech_to_text_v1.rb +186 -65
- data/lib/ibm_watson/text_to_speech_v1.rb +26 -17
- data/lib/ibm_watson/tone_analyzer_v3.rb +8 -2
- data/lib/ibm_watson/version.rb +1 -1
- data/lib/ibm_watson/visual_recognition_v3.rb +8 -2
- data/lib/ibm_watson/visual_recognition_v4.rb +8 -2
- data/test/integration/test_compare_comply_v1.rb +1 -12
- metadata +2 -2
@@ -47,6 +47,8 @@ module IBMWatson
|
|
47
47
|
# The Personality Insights V3 service.
|
48
48
|
class PersonalityInsightsV3 < IBMCloudSdkCore::BaseService
|
49
49
|
include Concurrent::Async
|
50
|
+
DEFAULT_SERVICE_NAME = "personality_insights"
|
51
|
+
DEFAULT_SERVICE_URL = "https://gateway.watsonplatform.net/personality-insights/api"
|
50
52
|
##
|
51
53
|
# @!method initialize(args)
|
52
54
|
# Construct a new client for the Personality Insights service.
|
@@ -65,19 +67,23 @@ module IBMWatson
|
|
65
67
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
66
68
|
# The base service_url may differ between IBM Cloud regions.
|
67
69
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
70
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
71
|
+
# any external configuration, if applicable.
|
68
72
|
def initialize(args = {})
|
69
73
|
@__async_initialized__ = false
|
70
74
|
defaults = {}
|
71
75
|
defaults[:version] = nil
|
72
|
-
defaults[:service_url] =
|
76
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
77
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
73
78
|
defaults[:authenticator] = nil
|
79
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
74
80
|
args = defaults.merge(args)
|
75
81
|
@version = args[:version]
|
76
82
|
raise ArgumentError.new("version must be provided") if @version.nil?
|
77
83
|
|
78
|
-
args[:service_name] = "personality_insights"
|
79
84
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
80
85
|
super
|
86
|
+
@service_url = user_service_url unless user_service_url.nil?
|
81
87
|
end
|
82
88
|
|
83
89
|
#########################
|
@@ -34,9 +34,9 @@
|
|
34
34
|
# is a formal language specification that lets you restrict the phrases that the service
|
35
35
|
# can recognize.
|
36
36
|
#
|
37
|
-
# Language model customization
|
38
|
-
#
|
39
|
-
#
|
37
|
+
# Language model customization and acoustic model customization are generally available
|
38
|
+
# for production use with all language models that are generally available. Grammars are
|
39
|
+
# beta functionality for all language models that support language model customization.
|
40
40
|
|
41
41
|
require "concurrent"
|
42
42
|
require "erb"
|
@@ -50,6 +50,8 @@ module IBMWatson
|
|
50
50
|
# The Speech to Text V1 service.
|
51
51
|
class SpeechToTextV1 < IBMCloudSdkCore::BaseService
|
52
52
|
include Concurrent::Async
|
53
|
+
DEFAULT_SERVICE_NAME = "speech_to_text"
|
54
|
+
DEFAULT_SERVICE_URL = "https://stream.watsonplatform.net/speech-to-text/api"
|
53
55
|
##
|
54
56
|
# @!method initialize(args)
|
55
57
|
# Construct a new client for the Speech to Text service.
|
@@ -58,15 +60,19 @@ module IBMWatson
|
|
58
60
|
# @option args service_url [String] The base service URL to use when contacting the service.
|
59
61
|
# The base service_url may differ between IBM Cloud regions.
|
60
62
|
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
|
63
|
+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
|
64
|
+
# any external configuration, if applicable.
|
61
65
|
def initialize(args = {})
|
62
66
|
@__async_initialized__ = false
|
63
67
|
defaults = {}
|
64
|
-
defaults[:service_url] =
|
68
|
+
defaults[:service_url] = DEFAULT_SERVICE_URL
|
69
|
+
defaults[:service_name] = DEFAULT_SERVICE_NAME
|
65
70
|
defaults[:authenticator] = nil
|
71
|
+
user_service_url = args[:service_url] unless args[:service_url].nil?
|
66
72
|
args = defaults.merge(args)
|
67
|
-
args[:service_name] = "speech_to_text"
|
68
73
|
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
|
69
74
|
super
|
75
|
+
@service_url = user_service_url unless user_service_url.nil?
|
70
76
|
end
|
71
77
|
|
72
78
|
#########################
|
@@ -135,7 +141,7 @@ module IBMWatson
|
|
135
141
|
#########################
|
136
142
|
|
137
143
|
##
|
138
|
-
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
144
|
+
# @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
139
145
|
# Recognize audio.
|
140
146
|
# Sends audio and returns transcription results for a recognition request. You can
|
141
147
|
# pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
|
@@ -277,8 +283,14 @@ module IBMWatson
|
|
277
283
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
278
284
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
279
285
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
280
|
-
# threshold.
|
281
|
-
#
|
286
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
287
|
+
# keywords.
|
288
|
+
#
|
289
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
290
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
291
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
292
|
+
#
|
293
|
+
# See [Keyword
|
282
294
|
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
283
295
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
284
296
|
# considered to match a keyword if its confidence is greater than or equal to the
|
@@ -323,11 +335,11 @@ module IBMWatson
|
|
323
335
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
324
336
|
# parameter.
|
325
337
|
#
|
326
|
-
# **Note:** Applies to US English, Japanese, and Spanish (both
|
327
|
-
# narrowband models) and UK English (narrowband model) transcription
|
328
|
-
# determine whether a language model supports speaker labels, you can also
|
329
|
-
# **Get a model** method and check that the attribute `speaker_labels` is
|
330
|
-
# `true`.
|
338
|
+
# **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
|
339
|
+
# broadband and narrowband models) and UK English (narrowband model) transcription
|
340
|
+
# only. To determine whether a language model supports speaker labels, you can also
|
341
|
+
# use the **Get a model** method and check that the attribute `speaker_labels` is
|
342
|
+
# set to `true`.
|
331
343
|
#
|
332
344
|
# See [Speaker
|
333
345
|
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
@@ -388,8 +400,33 @@ module IBMWatson
|
|
388
400
|
#
|
389
401
|
# See [Split transcript at phrase
|
390
402
|
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
403
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
404
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
405
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
406
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
407
|
+
# activity.
|
408
|
+
#
|
409
|
+
# Specify a value between 0.0 and 1.0:
|
410
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
411
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
412
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
413
|
+
#
|
414
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
415
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
416
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
417
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
418
|
+
# conversations or background noise.
|
419
|
+
#
|
420
|
+
# Specify a value in the range of 0.0 to 1.0:
|
421
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
422
|
+
# disabled).
|
423
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
424
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
425
|
+
#
|
426
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
427
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
391
428
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
392
|
-
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
429
|
+
def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
393
430
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
394
431
|
|
395
432
|
headers = {
|
@@ -420,7 +457,9 @@ module IBMWatson
|
|
420
457
|
"redaction" => redaction,
|
421
458
|
"audio_metrics" => audio_metrics,
|
422
459
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
423
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
460
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
461
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
462
|
+
"background_audio_suppression" => background_audio_suppression
|
424
463
|
}
|
425
464
|
|
426
465
|
data = audio
|
@@ -439,7 +478,7 @@ module IBMWatson
|
|
439
478
|
end
|
440
479
|
|
441
480
|
##
|
442
|
-
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
481
|
+
# @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
443
482
|
# Sends audio for speech recognition using web sockets.
|
444
483
|
# @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
|
445
484
|
# @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
|
@@ -531,6 +570,32 @@ module IBMWatson
|
|
531
570
|
#
|
532
571
|
# See [Split transcript at phrase
|
533
572
|
# end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
|
573
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
574
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
575
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
576
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
577
|
+
# activity.
|
578
|
+
#
|
579
|
+
# Specify a value between 0.0 and 1.0:
|
580
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
581
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
582
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
583
|
+
#
|
584
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
585
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
586
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
587
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
588
|
+
# conversations or background noise.
|
589
|
+
#
|
590
|
+
# Specify a value in the range of 0.0 to 1.0:
|
591
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
592
|
+
# disabled).
|
593
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
594
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
595
|
+
#
|
596
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
597
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
598
|
+
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
534
599
|
def recognize_using_websocket(
|
535
600
|
content_type: nil,
|
536
601
|
recognize_callback:,
|
@@ -559,7 +624,9 @@ module IBMWatson
|
|
559
624
|
processing_metrics_interval: nil,
|
560
625
|
audio_metrics: nil,
|
561
626
|
end_of_phrase_silence_time: nil,
|
562
|
-
split_transcript_at_phrase_end: nil
|
627
|
+
split_transcript_at_phrase_end: nil,
|
628
|
+
speech_detector_sensitivity: nil,
|
629
|
+
background_audio_suppression: nil
|
563
630
|
)
|
564
631
|
raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
|
565
632
|
raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
|
@@ -599,7 +666,9 @@ module IBMWatson
|
|
599
666
|
"processing_metrics_interval" => processing_metrics_interval,
|
600
667
|
"audio_metrics" => audio_metrics,
|
601
668
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
602
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
669
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
670
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
671
|
+
"background_audio_suppression" => background_audio_suppression
|
603
672
|
}
|
604
673
|
options.delete_if { |_, v| v.nil? }
|
605
674
|
WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
|
@@ -717,7 +786,7 @@ module IBMWatson
|
|
717
786
|
end
|
718
787
|
|
719
788
|
##
|
720
|
-
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
789
|
+
# @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
721
790
|
# Create a job.
|
722
791
|
# Creates a job for a new asynchronous recognition request. The job is owned by the
|
723
792
|
# instance of the service whose credentials are used to create it. How you learn the
|
@@ -903,8 +972,14 @@ module IBMWatson
|
|
903
972
|
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
|
904
973
|
# one or more string tokens. Keywords are spotted only in the final results, not in
|
905
974
|
# interim hypotheses. If you specify any keywords, you must also specify a keywords
|
906
|
-
# threshold.
|
907
|
-
#
|
975
|
+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
|
976
|
+
# keywords.
|
977
|
+
#
|
978
|
+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
|
979
|
+
# can have a maximum length of 1024 characters, though the maximum effective length
|
980
|
+
# for double-byte languages might be shorter. Keywords are case-insensitive.
|
981
|
+
#
|
982
|
+
# See [Keyword
|
908
983
|
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
|
909
984
|
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
|
910
985
|
# considered to match a keyword if its confidence is greater than or equal to the
|
@@ -949,11 +1024,11 @@ module IBMWatson
|
|
949
1024
|
# parameter to be `true`, regardless of whether you specify `false` for the
|
950
1025
|
# parameter.
|
951
1026
|
#
|
952
|
-
# **Note:** Applies to US English, Japanese, and Spanish (both
|
953
|
-
# narrowband models) and UK English (narrowband model) transcription
|
954
|
-
# determine whether a language model supports speaker labels, you can also
|
955
|
-
# **Get a model** method and check that the attribute `speaker_labels` is
|
956
|
-
# `true`.
|
1027
|
+
# **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
|
1028
|
+
# broadband and narrowband models) and UK English (narrowband model) transcription
|
1029
|
+
# only. To determine whether a language model supports speaker labels, you can also
|
1030
|
+
# use the **Get a model** method and check that the attribute `speaker_labels` is
|
1031
|
+
# set to `true`.
|
957
1032
|
#
|
958
1033
|
# See [Speaker
|
959
1034
|
# labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
|
@@ -1036,8 +1111,33 @@ module IBMWatson
|
|
1036
1111
|
#
|
1037
1112
|
# See [Split transcript at phrase
|
1038
1113
|
# end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
|
1114
|
+
# @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
|
1115
|
+
# the parameter to suppress word insertions from music, coughing, and other
|
1116
|
+
# non-speech events. The service biases the audio it passes for speech recognition
|
1117
|
+
# by evaluating the input audio against prior models of speech and non-speech
|
1118
|
+
# activity.
|
1119
|
+
#
|
1120
|
+
# Specify a value between 0.0 and 1.0:
|
1121
|
+
# * 0.0 suppresses all audio (no speech is transcribed).
|
1122
|
+
# * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
|
1123
|
+
# * 1.0 suppresses no audio (speech detection sensitivity is disabled).
|
1124
|
+
#
|
1125
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1126
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1127
|
+
# @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
|
1128
|
+
# to prevent it from being transcribed as speech. Use the parameter to suppress side
|
1129
|
+
# conversations or background noise.
|
1130
|
+
#
|
1131
|
+
# Specify a value in the range of 0.0 to 1.0:
|
1132
|
+
# * 0.0 (the default) provides no suppression (background audio suppression is
|
1133
|
+
# disabled).
|
1134
|
+
# * 0.5 provides a reasonable level of audio suppression for general usage.
|
1135
|
+
# * 1.0 suppresses all audio (no audio is transcribed).
|
1136
|
+
#
|
1137
|
+
# The values increase on a monotonic curve. See [Speech Activity
|
1138
|
+
# Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
|
1039
1139
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
1040
|
-
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
|
1140
|
+
def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
|
1041
1141
|
raise ArgumentError.new("audio must be provided") if audio.nil?
|
1042
1142
|
|
1043
1143
|
headers = {
|
@@ -1074,7 +1174,9 @@ module IBMWatson
|
|
1074
1174
|
"processing_metrics_interval" => processing_metrics_interval,
|
1075
1175
|
"audio_metrics" => audio_metrics,
|
1076
1176
|
"end_of_phrase_silence_time" => end_of_phrase_silence_time,
|
1077
|
-
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end
|
1177
|
+
"split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
|
1178
|
+
"speech_detector_sensitivity" => speech_detector_sensitivity,
|
1179
|
+
"background_audio_suppression" => background_audio_suppression
|
1078
1180
|
}
|
1079
1181
|
|
1080
1182
|
data = audio
|
@@ -1600,18 +1702,20 @@ module IBMWatson
|
|
1600
1702
|
#
|
1601
1703
|
# The call returns an HTTP 201 response code if the corpus is valid. The service
|
1602
1704
|
# then asynchronously processes the contents of the corpus and automatically
|
1603
|
-
# extracts new words that it finds. This can take on the order of
|
1604
|
-
# complete depending on the total number of words and the number of new words in
|
1605
|
-
# corpus, as well as the current load on the service. You cannot submit requests
|
1606
|
-
# add additional resources to the custom model or to train the model until the
|
1705
|
+
# extracts new words that it finds. This operation can take on the order of minutes
|
1706
|
+
# to complete depending on the total number of words and the number of new words in
|
1707
|
+
# the corpus, as well as the current load on the service. You cannot submit requests
|
1708
|
+
# to add additional resources to the custom model or to train the model until the
|
1607
1709
|
# service's analysis of the corpus for the current request completes. Use the **List
|
1608
1710
|
# a corpus** method to check the status of the analysis.
|
1609
1711
|
#
|
1610
1712
|
# The service auto-populates the model's words resource with words from the corpus
|
1611
|
-
# that are not found in its base vocabulary. These are referred to as
|
1612
|
-
# out-of-vocabulary (OOV) words.
|
1613
|
-
#
|
1614
|
-
#
|
1713
|
+
# that are not found in its base vocabulary. These words are referred to as
|
1714
|
+
# out-of-vocabulary (OOV) words. After adding a corpus, you must validate the words
|
1715
|
+
# resource to ensure that each OOV word's definition is complete and valid. You can
|
1716
|
+
# use the **List custom words** method to examine the words resource. You can use
|
1717
|
+
# other words method to eliminate typos and modify how words are pronounced as
|
1718
|
+
# needed.
|
1615
1719
|
#
|
1616
1720
|
# To add a corpus file that has the same name as an existing corpus, set the
|
1617
1721
|
# `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
|
@@ -1628,10 +1732,12 @@ module IBMWatson
|
|
1628
1732
|
# directly.
|
1629
1733
|
#
|
1630
1734
|
# **See also:**
|
1735
|
+
# * [Add a corpus to the custom language
|
1736
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus)
|
1631
1737
|
# * [Working with
|
1632
1738
|
# corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
|
1633
|
-
# * [
|
1634
|
-
#
|
1739
|
+
# * [Validating a words
|
1740
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1635
1741
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1636
1742
|
# the request. You must make the request with credentials for the instance of the
|
1637
1743
|
# service that owns the custom model.
|
@@ -1860,7 +1966,10 @@ module IBMWatson
|
|
1860
1966
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1861
1967
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1862
1968
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1863
|
-
# word.
|
1969
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
1970
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
1971
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
1972
|
+
# valid.
|
1864
1973
|
# * The `display_as` field provides a different way of spelling the word in a
|
1865
1974
|
# transcript. Use the parameter when you want the word to appear different from its
|
1866
1975
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1890,10 +1999,12 @@ module IBMWatson
|
|
1890
1999
|
#
|
1891
2000
|
#
|
1892
2001
|
# **See also:**
|
2002
|
+
# * [Add words to the custom language
|
2003
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
1893
2004
|
# * [Working with custom
|
1894
2005
|
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1895
|
-
# * [
|
1896
|
-
#
|
2006
|
+
# * [Validating a words
|
2007
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1897
2008
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1898
2009
|
# the request. You must make the request with credentials for the instance of the
|
1899
2010
|
# service that owns the custom model.
|
@@ -1949,7 +2060,10 @@ module IBMWatson
|
|
1949
2060
|
# the parameter for words that are difficult to pronounce, foreign words, acronyms,
|
1950
2061
|
# and so on. For example, you might specify that the word `IEEE` can sound like `i
|
1951
2062
|
# triple e`. You can specify a maximum of five sounds-like pronunciations for a
|
1952
|
-
# word.
|
2063
|
+
# word. If you omit the `sounds_like` field, the service attempts to set the field
|
2064
|
+
# to its pronunciation of the word. It cannot generate a pronunciation for all
|
2065
|
+
# words, so you must review the word's definition to ensure that it is complete and
|
2066
|
+
# valid.
|
1953
2067
|
# * The `display_as` field provides a different way of spelling the word in a
|
1954
2068
|
# transcript. Use the parameter when you want the word to appear different from its
|
1955
2069
|
# usual representation or from its spelling in training data. For example, you might
|
@@ -1961,10 +2075,12 @@ module IBMWatson
|
|
1961
2075
|
# the **List a custom word** method to review the word that you add.
|
1962
2076
|
#
|
1963
2077
|
# **See also:**
|
2078
|
+
# * [Add words to the custom language
|
2079
|
+
# model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
|
1964
2080
|
# * [Working with custom
|
1965
2081
|
# words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
|
1966
|
-
# * [
|
1967
|
-
#
|
2082
|
+
# * [Validating a words
|
2083
|
+
# resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
|
1968
2084
|
# @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
|
1969
2085
|
# the request. You must make the request with credentials for the instance of the
|
1970
2086
|
# service that owns the custom model.
|
@@ -2148,12 +2264,12 @@ module IBMWatson
|
|
2148
2264
|
#
|
2149
2265
|
# The call returns an HTTP 201 response code if the grammar is valid. The service
|
2150
2266
|
# then asynchronously processes the contents of the grammar and automatically
|
2151
|
-
# extracts new words that it finds. This can take a few seconds
|
2152
|
-
# depending on the size and complexity of the grammar, as well as the
|
2153
|
-
# on the service. You cannot submit requests to add additional
|
2154
|
-
# custom model or to train the model until the service's analysis
|
2155
|
-
# the current request completes. Use the **Get a grammar** method
|
2156
|
-
# status of the analysis.
|
2267
|
+
# extracts new words that it finds. This operation can take a few seconds or minutes
|
2268
|
+
# to complete depending on the size and complexity of the grammar, as well as the
|
2269
|
+
# current load on the service. You cannot submit requests to add additional
|
2270
|
+
# resources to the custom model or to train the model until the service's analysis
|
2271
|
+
# of the grammar for the current request completes. Use the **Get a grammar** method
|
2272
|
+
# to check the status of the analysis.
|
2157
2273
|
#
|
2158
2274
|
# The service populates the model's words resource with any word that is recognized
|
2159
2275
|
# by the grammar that is not found in the model's base vocabulary. These are
|
@@ -2500,7 +2616,7 @@ module IBMWatson
|
|
2500
2616
|
# to complete depending on the total amount of audio data on which the custom
|
2501
2617
|
# acoustic model is being trained and the current load on the service. Typically,
|
2502
2618
|
# training a custom acoustic model takes approximately two to four times the length
|
2503
|
-
# of its audio data. The
|
2619
|
+
# of its audio data. The actual time depends on the model being trained and the
|
2504
2620
|
# nature of the audio, such as whether the audio is clean or noisy. The method
|
2505
2621
|
# returns an HTTP 200 response code to indicate that the training process has begun.
|
2506
2622
|
#
|
@@ -2519,8 +2635,9 @@ module IBMWatson
|
|
2519
2635
|
# Train with a custom language model if you have verbatim transcriptions of the
|
2520
2636
|
# audio files that you have added to the custom model or you have either corpora
|
2521
2637
|
# (text files) or a list of words that are relevant to the contents of the audio
|
2522
|
-
# files.
|
2523
|
-
# base model
|
2638
|
+
# files. For training to succeed, both of the custom models must be based on the
|
2639
|
+
# same version of the same base model, and the custom language model must be fully
|
2640
|
+
# trained and available.
|
2524
2641
|
#
|
2525
2642
|
# **See also:**
|
2526
2643
|
# * [Train the custom acoustic
|
@@ -2536,6 +2653,9 @@ module IBMWatson
|
|
2536
2653
|
# another training request or a request to add audio resources to the model.
|
2537
2654
|
# * The custom model contains less than 10 minutes or more than 200 hours of audio
|
2538
2655
|
# data.
|
2656
|
+
# * You passed a custom language model with the `custom_language_model_id` query
|
2657
|
+
# parameter that is not in the available state. A custom language model must be
|
2658
|
+
# fully trained and available to be used to train a custom acoustic model.
|
2539
2659
|
# * You passed an incompatible custom language model with the
|
2540
2660
|
# `custom_language_model_id` query parameter. Both custom models must be based on
|
2541
2661
|
# the same version of the same base model.
|
@@ -2551,8 +2671,8 @@ module IBMWatson
|
|
2551
2671
|
# been trained with verbatim transcriptions of the audio resources or that contains
|
2552
2672
|
# words that are relevant to the contents of the audio resources. The custom
|
2553
2673
|
# language model must be based on the same version of the same base model as the
|
2554
|
-
# custom acoustic model
|
2555
|
-
# custom models.
|
2674
|
+
# custom acoustic model, and the custom language model must be fully trained and
|
2675
|
+
# available. The credentials specified with the request must own both custom models.
|
2556
2676
|
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
|
2557
2677
|
def train_acoustic_model(customization_id:, custom_language_model_id: nil)
|
2558
2678
|
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
|
@@ -2650,8 +2770,9 @@ module IBMWatson
|
|
2650
2770
|
# service that owns the custom model.
|
2651
2771
|
# @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
|
2652
2772
|
# customization ID (GUID) of that custom language model. The custom language model
|
2653
|
-
# must be upgraded before the custom acoustic model can be upgraded. The
|
2654
|
-
#
|
2773
|
+
# must be upgraded before the custom acoustic model can be upgraded. The custom
|
2774
|
+
# language model must be fully trained and available. The credentials specified with
|
2775
|
+
# the request must own both custom models.
|
2655
2776
|
# @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
|
2656
2777
|
# has been modified since it was last trained. Use this parameter only to force the
|
2657
2778
|
# upgrade of a custom acoustic model that is trained with a custom language model,
|
@@ -2746,14 +2867,14 @@ module IBMWatson
|
|
2746
2867
|
# same name as an existing audio resource, set the `allow_overwrite` parameter to
|
2747
2868
|
# `true`; otherwise, the request fails.
|
2748
2869
|
#
|
2749
|
-
# The method is asynchronous. It can take several seconds to complete
|
2750
|
-
# the duration of the audio and, in the case of an archive file, the
|
2751
|
-
# audio files being processed. The service returns a 201 response
|
2752
|
-
# is valid. It then asynchronously analyzes the contents of the
|
2753
|
-
# and automatically extracts information about the audio such as
|
2754
|
-
# sampling rate, and encoding. You cannot submit requests to train or
|
2755
|
-
# model until the service's analysis of all audio resources for current
|
2756
|
-
# completes.
|
2870
|
+
# The method is asynchronous. It can take several seconds or minutes to complete
|
2871
|
+
# depending on the duration of the audio and, in the case of an archive file, the
|
2872
|
+
# total number of audio files being processed. The service returns a 201 response
|
2873
|
+
# code if the audio is valid. It then asynchronously analyzes the contents of the
|
2874
|
+
# audio file or files and automatically extracts information about the audio such as
|
2875
|
+
# its length, sampling rate, and encoding. You cannot submit requests to train or
|
2876
|
+
# upgrade the model until the service's analysis of all audio resources for current
|
2877
|
+
# requests completes.
|
2757
2878
|
#
|
2758
2879
|
# To determine the status of the service's analysis of the audio, use the **Get an
|
2759
2880
|
# audio resource** method to poll the status of the audio. The method accepts the
|