ibm_watson 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,6 +47,8 @@ module IBMWatson
47
47
  # The Personality Insights V3 service.
48
48
  class PersonalityInsightsV3 < IBMCloudSdkCore::BaseService
49
49
  include Concurrent::Async
50
+ DEFAULT_SERVICE_NAME = "personality_insights"
51
+ DEFAULT_SERVICE_URL = "https://gateway.watsonplatform.net/personality-insights/api"
50
52
  ##
51
53
  # @!method initialize(args)
52
54
  # Construct a new client for the Personality Insights service.
@@ -65,19 +67,23 @@ module IBMWatson
65
67
  # @option args service_url [String] The base service URL to use when contacting the service.
66
68
  # The base service_url may differ between IBM Cloud regions.
67
69
  # @option args authenticator [Object] The Authenticator instance to be configured for this service.
70
+ # @option args service_name [String] The name of the service to configure. Will be used as the key to load
71
+ # any external configuration, if applicable.
68
72
  def initialize(args = {})
69
73
  @__async_initialized__ = false
70
74
  defaults = {}
71
75
  defaults[:version] = nil
72
- defaults[:service_url] = "https://gateway.watsonplatform.net/personality-insights/api"
76
+ defaults[:service_url] = DEFAULT_SERVICE_URL
77
+ defaults[:service_name] = DEFAULT_SERVICE_NAME
73
78
  defaults[:authenticator] = nil
79
+ user_service_url = args[:service_url] unless args[:service_url].nil?
74
80
  args = defaults.merge(args)
75
81
  @version = args[:version]
76
82
  raise ArgumentError.new("version must be provided") if @version.nil?
77
83
 
78
- args[:service_name] = "personality_insights"
79
84
  args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
80
85
  super
86
+ @service_url = user_service_url unless user_service_url.nil?
81
87
  end
82
88
 
83
89
  #########################
@@ -34,9 +34,9 @@
34
34
  # is a formal language specification that lets you restrict the phrases that the service
35
35
  # can recognize.
36
36
  #
37
- # Language model customization is generally available for production use with most
38
- # supported languages. Acoustic model customization is beta functionality that is
39
- # available for all supported languages.
37
+ # Language model customization and acoustic model customization are generally available
38
+ # for production use with all language models that are generally available. Grammars are
39
+ # beta functionality for all language models that support language model customization.
40
40
 
41
41
  require "concurrent"
42
42
  require "erb"
@@ -50,6 +50,8 @@ module IBMWatson
50
50
  # The Speech to Text V1 service.
51
51
  class SpeechToTextV1 < IBMCloudSdkCore::BaseService
52
52
  include Concurrent::Async
53
+ DEFAULT_SERVICE_NAME = "speech_to_text"
54
+ DEFAULT_SERVICE_URL = "https://stream.watsonplatform.net/speech-to-text/api"
53
55
  ##
54
56
  # @!method initialize(args)
55
57
  # Construct a new client for the Speech to Text service.
@@ -58,15 +60,19 @@ module IBMWatson
58
60
  # @option args service_url [String] The base service URL to use when contacting the service.
59
61
  # The base service_url may differ between IBM Cloud regions.
60
62
  # @option args authenticator [Object] The Authenticator instance to be configured for this service.
63
+ # @option args service_name [String] The name of the service to configure. Will be used as the key to load
64
+ # any external configuration, if applicable.
61
65
  def initialize(args = {})
62
66
  @__async_initialized__ = false
63
67
  defaults = {}
64
- defaults[:service_url] = "https://stream.watsonplatform.net/speech-to-text/api"
68
+ defaults[:service_url] = DEFAULT_SERVICE_URL
69
+ defaults[:service_name] = DEFAULT_SERVICE_NAME
65
70
  defaults[:authenticator] = nil
71
+ user_service_url = args[:service_url] unless args[:service_url].nil?
66
72
  args = defaults.merge(args)
67
- args[:service_name] = "speech_to_text"
68
73
  args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
69
74
  super
75
+ @service_url = user_service_url unless user_service_url.nil?
70
76
  end
71
77
 
72
78
  #########################
@@ -135,7 +141,7 @@ module IBMWatson
135
141
  #########################
136
142
 
137
143
  ##
138
- # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
144
+ # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
139
145
  # Recognize audio.
140
146
  # Sends audio and returns transcription results for a recognition request. You can
141
147
  # pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
@@ -277,8 +283,14 @@ module IBMWatson
277
283
  # @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
278
284
  # one or more string tokens. Keywords are spotted only in the final results, not in
279
285
  # interim hypotheses. If you specify any keywords, you must also specify a keywords
280
- # threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
281
- # an empty array if you do not need to spot keywords. See [Keyword
286
+ # threshold. Omit the parameter or specify an empty array if you do not need to spot
287
+ # keywords.
288
+ #
289
+ # You can spot a maximum of 1000 keywords with a single request. A single keyword
290
+ # can have a maximum length of 1024 characters, though the maximum effective length
291
+ # for double-byte languages might be shorter. Keywords are case-insensitive.
292
+ #
293
+ # See [Keyword
282
294
  # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
283
295
  # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
284
296
  # considered to match a keyword if its confidence is greater than or equal to the
@@ -323,11 +335,11 @@ module IBMWatson
323
335
  # parameter to be `true`, regardless of whether you specify `false` for the
324
336
  # parameter.
325
337
  #
326
- # **Note:** Applies to US English, Japanese, and Spanish (both broadband and
327
- # narrowband models) and UK English (narrowband model) transcription only. To
328
- # determine whether a language model supports speaker labels, you can also use the
329
- # **Get a model** method and check that the attribute `speaker_labels` is set to
330
- # `true`.
338
+ # **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
339
+ # broadband and narrowband models) and UK English (narrowband model) transcription
340
+ # only. To determine whether a language model supports speaker labels, you can also
341
+ # use the **Get a model** method and check that the attribute `speaker_labels` is
342
+ # set to `true`.
331
343
  #
332
344
  # See [Speaker
333
345
  # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
@@ -388,8 +400,33 @@ module IBMWatson
388
400
  #
389
401
  # See [Split transcript at phrase
390
402
  # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
403
+ # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
404
+ # the parameter to suppress word insertions from music, coughing, and other
405
+ # non-speech events. The service biases the audio it passes for speech recognition
406
+ # by evaluating the input audio against prior models of speech and non-speech
407
+ # activity.
408
+ #
409
+ # Specify a value between 0.0 and 1.0:
410
+ # * 0.0 suppresses all audio (no speech is transcribed).
411
+ # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
412
+ # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
413
+ #
414
+ # The values increase on a monotonic curve. See [Speech Activity
415
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
416
+ # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
417
+ # to prevent it from being transcribed as speech. Use the parameter to suppress side
418
+ # conversations or background noise.
419
+ #
420
+ # Specify a value in the range of 0.0 to 1.0:
421
+ # * 0.0 (the default) provides no suppression (background audio suppression is
422
+ # disabled).
423
+ # * 0.5 provides a reasonable level of audio suppression for general usage.
424
+ # * 1.0 suppresses all audio (no audio is transcribed).
425
+ #
426
+ # The values increase on a monotonic curve. See [Speech Activity
427
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
391
428
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
392
- def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
429
+ def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
393
430
  raise ArgumentError.new("audio must be provided") if audio.nil?
394
431
 
395
432
  headers = {
@@ -420,7 +457,9 @@ module IBMWatson
420
457
  "redaction" => redaction,
421
458
  "audio_metrics" => audio_metrics,
422
459
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
423
- "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
460
+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
461
+ "speech_detector_sensitivity" => speech_detector_sensitivity,
462
+ "background_audio_suppression" => background_audio_suppression
424
463
  }
425
464
 
426
465
  data = audio
@@ -439,7 +478,7 @@ module IBMWatson
439
478
  end
440
479
 
441
480
  ##
442
- # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
481
+ # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
443
482
  # Sends audio for speech recognition using web sockets.
444
483
  # @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
445
484
  # @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
@@ -531,6 +570,32 @@ module IBMWatson
531
570
  #
532
571
  # See [Split transcript at phrase
533
572
  # end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
573
+ # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
574
+ # the parameter to suppress word insertions from music, coughing, and other
575
+ # non-speech events. The service biases the audio it passes for speech recognition
576
+ # by evaluating the input audio against prior models of speech and non-speech
577
+ # activity.
578
+ #
579
+ # Specify a value between 0.0 and 1.0:
580
+ # * 0.0 suppresses all audio (no speech is transcribed).
581
+ # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
582
+ # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
583
+ #
584
+ # The values increase on a monotonic curve. See [Speech Activity
585
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
586
+ # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
587
+ # to prevent it from being transcribed as speech. Use the parameter to suppress side
588
+ # conversations or background noise.
589
+ #
590
+ # Specify a value in the range of 0.0 to 1.0:
591
+ # * 0.0 (the default) provides no suppression (background audio suppression is
592
+ # disabled).
593
+ # * 0.5 provides a reasonable level of audio suppression for general usage.
594
+ # * 1.0 suppresses all audio (no audio is transcribed).
595
+ #
596
+ # The values increase on a monotonic curve. See [Speech Activity
597
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
598
+ # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
534
599
  def recognize_using_websocket(
535
600
  content_type: nil,
536
601
  recognize_callback:,
@@ -559,7 +624,9 @@ module IBMWatson
559
624
  processing_metrics_interval: nil,
560
625
  audio_metrics: nil,
561
626
  end_of_phrase_silence_time: nil,
562
- split_transcript_at_phrase_end: nil
627
+ split_transcript_at_phrase_end: nil,
628
+ speech_detector_sensitivity: nil,
629
+ background_audio_suppression: nil
563
630
  )
564
631
  raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
565
632
  raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
@@ -599,7 +666,9 @@ module IBMWatson
599
666
  "processing_metrics_interval" => processing_metrics_interval,
600
667
  "audio_metrics" => audio_metrics,
601
668
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
602
- "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
669
+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
670
+ "speech_detector_sensitivity" => speech_detector_sensitivity,
671
+ "background_audio_suppression" => background_audio_suppression
603
672
  }
604
673
  options.delete_if { |_, v| v.nil? }
605
674
  WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
@@ -717,7 +786,7 @@ module IBMWatson
717
786
  end
718
787
 
719
788
  ##
720
- # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
789
+ # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
721
790
  # Create a job.
722
791
  # Creates a job for a new asynchronous recognition request. The job is owned by the
723
792
  # instance of the service whose credentials are used to create it. How you learn the
@@ -903,8 +972,14 @@ module IBMWatson
903
972
  # @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
904
973
  # one or more string tokens. Keywords are spotted only in the final results, not in
905
974
  # interim hypotheses. If you specify any keywords, you must also specify a keywords
906
- # threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
907
- # an empty array if you do not need to spot keywords. See [Keyword
975
+ # threshold. Omit the parameter or specify an empty array if you do not need to spot
976
+ # keywords.
977
+ #
978
+ # You can spot a maximum of 1000 keywords with a single request. A single keyword
979
+ # can have a maximum length of 1024 characters, though the maximum effective length
980
+ # for double-byte languages might be shorter. Keywords are case-insensitive.
981
+ #
982
+ # See [Keyword
908
983
  # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
909
984
  # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
910
985
  # considered to match a keyword if its confidence is greater than or equal to the
@@ -949,11 +1024,11 @@ module IBMWatson
949
1024
  # parameter to be `true`, regardless of whether you specify `false` for the
950
1025
  # parameter.
951
1026
  #
952
- # **Note:** Applies to US English, Japanese, and Spanish (both broadband and
953
- # narrowband models) and UK English (narrowband model) transcription only. To
954
- # determine whether a language model supports speaker labels, you can also use the
955
- # **Get a model** method and check that the attribute `speaker_labels` is set to
956
- # `true`.
1027
+ # **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
1028
+ # broadband and narrowband models) and UK English (narrowband model) transcription
1029
+ # only. To determine whether a language model supports speaker labels, you can also
1030
+ # use the **Get a model** method and check that the attribute `speaker_labels` is
1031
+ # set to `true`.
957
1032
  #
958
1033
  # See [Speaker
959
1034
  # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
@@ -1036,8 +1111,33 @@ module IBMWatson
1036
1111
  #
1037
1112
  # See [Split transcript at phrase
1038
1113
  # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
1114
+ # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
1115
+ # the parameter to suppress word insertions from music, coughing, and other
1116
+ # non-speech events. The service biases the audio it passes for speech recognition
1117
+ # by evaluating the input audio against prior models of speech and non-speech
1118
+ # activity.
1119
+ #
1120
+ # Specify a value between 0.0 and 1.0:
1121
+ # * 0.0 suppresses all audio (no speech is transcribed).
1122
+ # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
1123
+ # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
1124
+ #
1125
+ # The values increase on a monotonic curve. See [Speech Activity
1126
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
1127
+ # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
1128
+ # to prevent it from being transcribed as speech. Use the parameter to suppress side
1129
+ # conversations or background noise.
1130
+ #
1131
+ # Specify a value in the range of 0.0 to 1.0:
1132
+ # * 0.0 (the default) provides no suppression (background audio suppression is
1133
+ # disabled).
1134
+ # * 0.5 provides a reasonable level of audio suppression for general usage.
1135
+ # * 1.0 suppresses all audio (no audio is transcribed).
1136
+ #
1137
+ # The values increase on a monotonic curve. See [Speech Activity
1138
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
1039
1139
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1040
- def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
1140
+ def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
1041
1141
  raise ArgumentError.new("audio must be provided") if audio.nil?
1042
1142
 
1043
1143
  headers = {
@@ -1074,7 +1174,9 @@ module IBMWatson
1074
1174
  "processing_metrics_interval" => processing_metrics_interval,
1075
1175
  "audio_metrics" => audio_metrics,
1076
1176
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
1077
- "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
1177
+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
1178
+ "speech_detector_sensitivity" => speech_detector_sensitivity,
1179
+ "background_audio_suppression" => background_audio_suppression
1078
1180
  }
1079
1181
 
1080
1182
  data = audio
@@ -1600,18 +1702,20 @@ module IBMWatson
1600
1702
  #
1601
1703
  # The call returns an HTTP 201 response code if the corpus is valid. The service
1602
1704
  # then asynchronously processes the contents of the corpus and automatically
1603
- # extracts new words that it finds. This can take on the order of a minute or two to
1604
- # complete depending on the total number of words and the number of new words in the
1605
- # corpus, as well as the current load on the service. You cannot submit requests to
1606
- # add additional resources to the custom model or to train the model until the
1705
+ # extracts new words that it finds. This operation can take on the order of minutes
1706
+ # to complete depending on the total number of words and the number of new words in
1707
+ # the corpus, as well as the current load on the service. You cannot submit requests
1708
+ # to add additional resources to the custom model or to train the model until the
1607
1709
  # service's analysis of the corpus for the current request completes. Use the **List
1608
1710
  # a corpus** method to check the status of the analysis.
1609
1711
  #
1610
1712
  # The service auto-populates the model's words resource with words from the corpus
1611
- # that are not found in its base vocabulary. These are referred to as
1612
- # out-of-vocabulary (OOV) words. You can use the **List custom words** method to
1613
- # examine the words resource. You can use other words method to eliminate typos and
1614
- # modify how words are pronounced as needed.
1713
+ # that are not found in its base vocabulary. These words are referred to as
1714
+ # out-of-vocabulary (OOV) words. After adding a corpus, you must validate the words
1715
+ # resource to ensure that each OOV word's definition is complete and valid. You can
1716
+ # use the **List custom words** method to examine the words resource. You can use
1717
+ # other words method to eliminate typos and modify how words are pronounced as
1718
+ # needed.
1615
1719
  #
1616
1720
  # To add a corpus file that has the same name as an existing corpus, set the
1617
1721
  # `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
@@ -1628,10 +1732,12 @@ module IBMWatson
1628
1732
  # directly.
1629
1733
  #
1630
1734
  # **See also:**
1735
+ # * [Add a corpus to the custom language
1736
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus)
1631
1737
  # * [Working with
1632
1738
  # corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
1633
- # * [Add a corpus to the custom language
1634
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus).
1739
+ # * [Validating a words
1740
+ # resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
1635
1741
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1636
1742
  # the request. You must make the request with credentials for the instance of the
1637
1743
  # service that owns the custom model.
@@ -1860,7 +1966,10 @@ module IBMWatson
1860
1966
  # the parameter for words that are difficult to pronounce, foreign words, acronyms,
1861
1967
  # and so on. For example, you might specify that the word `IEEE` can sound like `i
1862
1968
  # triple e`. You can specify a maximum of five sounds-like pronunciations for a
1863
- # word.
1969
+ # word. If you omit the `sounds_like` field, the service attempts to set the field
1970
+ # to its pronunciation of the word. It cannot generate a pronunciation for all
1971
+ # words, so you must review the word's definition to ensure that it is complete and
1972
+ # valid.
1864
1973
  # * The `display_as` field provides a different way of spelling the word in a
1865
1974
  # transcript. Use the parameter when you want the word to appear different from its
1866
1975
  # usual representation or from its spelling in training data. For example, you might
@@ -1890,10 +1999,12 @@ module IBMWatson
1890
1999
  #
1891
2000
  #
1892
2001
  # **See also:**
2002
+ # * [Add words to the custom language
2003
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
1893
2004
  # * [Working with custom
1894
2005
  # words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
1895
- # * [Add words to the custom language
1896
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords).
2006
+ # * [Validating a words
2007
+ # resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
1897
2008
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1898
2009
  # the request. You must make the request with credentials for the instance of the
1899
2010
  # service that owns the custom model.
@@ -1949,7 +2060,10 @@ module IBMWatson
1949
2060
  # the parameter for words that are difficult to pronounce, foreign words, acronyms,
1950
2061
  # and so on. For example, you might specify that the word `IEEE` can sound like `i
1951
2062
  # triple e`. You can specify a maximum of five sounds-like pronunciations for a
1952
- # word.
2063
+ # word. If you omit the `sounds_like` field, the service attempts to set the field
2064
+ # to its pronunciation of the word. It cannot generate a pronunciation for all
2065
+ # words, so you must review the word's definition to ensure that it is complete and
2066
+ # valid.
1953
2067
  # * The `display_as` field provides a different way of spelling the word in a
1954
2068
  # transcript. Use the parameter when you want the word to appear different from its
1955
2069
  # usual representation or from its spelling in training data. For example, you might
@@ -1961,10 +2075,12 @@ module IBMWatson
1961
2075
  # the **List a custom word** method to review the word that you add.
1962
2076
  #
1963
2077
  # **See also:**
2078
+ # * [Add words to the custom language
2079
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
1964
2080
  # * [Working with custom
1965
2081
  # words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
1966
- # * [Add words to the custom language
1967
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords).
2082
+ # * [Validating a words
2083
+ # resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
1968
2084
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1969
2085
  # the request. You must make the request with credentials for the instance of the
1970
2086
  # service that owns the custom model.
@@ -2148,12 +2264,12 @@ module IBMWatson
2148
2264
  #
2149
2265
  # The call returns an HTTP 201 response code if the grammar is valid. The service
2150
2266
  # then asynchronously processes the contents of the grammar and automatically
2151
- # extracts new words that it finds. This can take a few seconds to complete
2152
- # depending on the size and complexity of the grammar, as well as the current load
2153
- # on the service. You cannot submit requests to add additional resources to the
2154
- # custom model or to train the model until the service's analysis of the grammar for
2155
- # the current request completes. Use the **Get a grammar** method to check the
2156
- # status of the analysis.
2267
+ # extracts new words that it finds. This operation can take a few seconds or minutes
2268
+ # to complete depending on the size and complexity of the grammar, as well as the
2269
+ # current load on the service. You cannot submit requests to add additional
2270
+ # resources to the custom model or to train the model until the service's analysis
2271
+ # of the grammar for the current request completes. Use the **Get a grammar** method
2272
+ # to check the status of the analysis.
2157
2273
  #
2158
2274
  # The service populates the model's words resource with any word that is recognized
2159
2275
  # by the grammar that is not found in the model's base vocabulary. These are
@@ -2500,7 +2616,7 @@ module IBMWatson
2500
2616
  # to complete depending on the total amount of audio data on which the custom
2501
2617
  # acoustic model is being trained and the current load on the service. Typically,
2502
2618
  # training a custom acoustic model takes approximately two to four times the length
2503
- # of its audio data. The range of time depends on the model being trained and the
2619
+ # of its audio data. The actual time depends on the model being trained and the
2504
2620
  # nature of the audio, such as whether the audio is clean or noisy. The method
2505
2621
  # returns an HTTP 200 response code to indicate that the training process has begun.
2506
2622
  #
@@ -2519,8 +2635,9 @@ module IBMWatson
2519
2635
  # Train with a custom language model if you have verbatim transcriptions of the
2520
2636
  # audio files that you have added to the custom model or you have either corpora
2521
2637
  # (text files) or a list of words that are relevant to the contents of the audio
2522
- # files. Both of the custom models must be based on the same version of the same
2523
- # base model for training to succeed.
2638
+ # files. For training to succeed, both of the custom models must be based on the
2639
+ # same version of the same base model, and the custom language model must be fully
2640
+ # trained and available.
2524
2641
  #
2525
2642
  # **See also:**
2526
2643
  # * [Train the custom acoustic
@@ -2536,6 +2653,9 @@ module IBMWatson
2536
2653
  # another training request or a request to add audio resources to the model.
2537
2654
  # * The custom model contains less than 10 minutes or more than 200 hours of audio
2538
2655
  # data.
2656
+ # * You passed a custom language model with the `custom_language_model_id` query
2657
+ # parameter that is not in the available state. A custom language model must be
2658
+ # fully trained and available to be used to train a custom acoustic model.
2539
2659
  # * You passed an incompatible custom language model with the
2540
2660
  # `custom_language_model_id` query parameter. Both custom models must be based on
2541
2661
  # the same version of the same base model.
@@ -2551,8 +2671,8 @@ module IBMWatson
2551
2671
  # been trained with verbatim transcriptions of the audio resources or that contains
2552
2672
  # words that are relevant to the contents of the audio resources. The custom
2553
2673
  # language model must be based on the same version of the same base model as the
2554
- # custom acoustic model. The credentials specified with the request must own both
2555
- # custom models.
2674
+ # custom acoustic model, and the custom language model must be fully trained and
2675
+ # available. The credentials specified with the request must own both custom models.
2556
2676
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
2557
2677
  def train_acoustic_model(customization_id:, custom_language_model_id: nil)
2558
2678
  raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
@@ -2650,8 +2770,9 @@ module IBMWatson
2650
2770
  # service that owns the custom model.
2651
2771
  # @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
2652
2772
  # customization ID (GUID) of that custom language model. The custom language model
2653
- # must be upgraded before the custom acoustic model can be upgraded. The credentials
2654
- # specified with the request must own both custom models.
2773
+ # must be upgraded before the custom acoustic model can be upgraded. The custom
2774
+ # language model must be fully trained and available. The credentials specified with
2775
+ # the request must own both custom models.
2655
2776
  # @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
2656
2777
  # has been modified since it was last trained. Use this parameter only to force the
2657
2778
  # upgrade of a custom acoustic model that is trained with a custom language model,
@@ -2746,14 +2867,14 @@ module IBMWatson
2746
2867
  # same name as an existing audio resource, set the `allow_overwrite` parameter to
2747
2868
  # `true`; otherwise, the request fails.
2748
2869
  #
2749
- # The method is asynchronous. It can take several seconds to complete depending on
2750
- # the duration of the audio and, in the case of an archive file, the total number of
2751
- # audio files being processed. The service returns a 201 response code if the audio
2752
- # is valid. It then asynchronously analyzes the contents of the audio file or files
2753
- # and automatically extracts information about the audio such as its length,
2754
- # sampling rate, and encoding. You cannot submit requests to train or upgrade the
2755
- # model until the service's analysis of all audio resources for current requests
2756
- # completes.
2870
+ # The method is asynchronous. It can take several seconds or minutes to complete
2871
+ # depending on the duration of the audio and, in the case of an archive file, the
2872
+ # total number of audio files being processed. The service returns a 201 response
2873
+ # code if the audio is valid. It then asynchronously analyzes the contents of the
2874
+ # audio file or files and automatically extracts information about the audio such as
2875
+ # its length, sampling rate, and encoding. You cannot submit requests to train or
2876
+ # upgrade the model until the service's analysis of all audio resources for current
2877
+ # requests completes.
2757
2878
  #
2758
2879
  # To determine the status of the service's analysis of the audio, use the **Get an
2759
2880
  # audio resource** method to poll the status of the audio. The method accepts the