ibm_watson 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -47,6 +47,8 @@ module IBMWatson
47
47
  # The Personality Insights V3 service.
48
48
  class PersonalityInsightsV3 < IBMCloudSdkCore::BaseService
49
49
  include Concurrent::Async
50
+ DEFAULT_SERVICE_NAME = "personality_insights"
51
+ DEFAULT_SERVICE_URL = "https://gateway.watsonplatform.net/personality-insights/api"
50
52
  ##
51
53
  # @!method initialize(args)
52
54
  # Construct a new client for the Personality Insights service.
@@ -65,19 +67,23 @@ module IBMWatson
65
67
  # @option args service_url [String] The base service URL to use when contacting the service.
66
68
  # The base service_url may differ between IBM Cloud regions.
67
69
  # @option args authenticator [Object] The Authenticator instance to be configured for this service.
70
+ # @option args service_name [String] The name of the service to configure. Will be used as the key to load
71
+ # any external configuration, if applicable.
68
72
  def initialize(args = {})
69
73
  @__async_initialized__ = false
70
74
  defaults = {}
71
75
  defaults[:version] = nil
72
- defaults[:service_url] = "https://gateway.watsonplatform.net/personality-insights/api"
76
+ defaults[:service_url] = DEFAULT_SERVICE_URL
77
+ defaults[:service_name] = DEFAULT_SERVICE_NAME
73
78
  defaults[:authenticator] = nil
79
+ user_service_url = args[:service_url] unless args[:service_url].nil?
74
80
  args = defaults.merge(args)
75
81
  @version = args[:version]
76
82
  raise ArgumentError.new("version must be provided") if @version.nil?
77
83
 
78
- args[:service_name] = "personality_insights"
79
84
  args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
80
85
  super
86
+ @service_url = user_service_url unless user_service_url.nil?
81
87
  end
82
88
 
83
89
  #########################
@@ -34,9 +34,9 @@
34
34
  # is a formal language specification that lets you restrict the phrases that the service
35
35
  # can recognize.
36
36
  #
37
- # Language model customization is generally available for production use with most
38
- # supported languages. Acoustic model customization is beta functionality that is
39
- # available for all supported languages.
37
+ # Language model customization and acoustic model customization are generally available
38
+ # for production use with all language models that are generally available. Grammars are
39
+ # beta functionality for all language models that support language model customization.
40
40
 
41
41
  require "concurrent"
42
42
  require "erb"
@@ -50,6 +50,8 @@ module IBMWatson
50
50
  # The Speech to Text V1 service.
51
51
  class SpeechToTextV1 < IBMCloudSdkCore::BaseService
52
52
  include Concurrent::Async
53
+ DEFAULT_SERVICE_NAME = "speech_to_text"
54
+ DEFAULT_SERVICE_URL = "https://stream.watsonplatform.net/speech-to-text/api"
53
55
  ##
54
56
  # @!method initialize(args)
55
57
  # Construct a new client for the Speech to Text service.
@@ -58,15 +60,19 @@ module IBMWatson
58
60
  # @option args service_url [String] The base service URL to use when contacting the service.
59
61
  # The base service_url may differ between IBM Cloud regions.
60
62
  # @option args authenticator [Object] The Authenticator instance to be configured for this service.
63
+ # @option args service_name [String] The name of the service to configure. Will be used as the key to load
64
+ # any external configuration, if applicable.
61
65
  def initialize(args = {})
62
66
  @__async_initialized__ = false
63
67
  defaults = {}
64
- defaults[:service_url] = "https://stream.watsonplatform.net/speech-to-text/api"
68
+ defaults[:service_url] = DEFAULT_SERVICE_URL
69
+ defaults[:service_name] = DEFAULT_SERVICE_NAME
65
70
  defaults[:authenticator] = nil
71
+ user_service_url = args[:service_url] unless args[:service_url].nil?
66
72
  args = defaults.merge(args)
67
- args[:service_name] = "speech_to_text"
68
73
  args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
69
74
  super
75
+ @service_url = user_service_url unless user_service_url.nil?
70
76
  end
71
77
 
72
78
  #########################
@@ -135,7 +141,7 @@ module IBMWatson
135
141
  #########################
136
142
 
137
143
  ##
138
- # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
144
+ # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
139
145
  # Recognize audio.
140
146
  # Sends audio and returns transcription results for a recognition request. You can
141
147
  # pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
@@ -277,8 +283,14 @@ module IBMWatson
277
283
  # @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
278
284
  # one or more string tokens. Keywords are spotted only in the final results, not in
279
285
  # interim hypotheses. If you specify any keywords, you must also specify a keywords
280
- # threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
281
- # an empty array if you do not need to spot keywords. See [Keyword
286
+ # threshold. Omit the parameter or specify an empty array if you do not need to spot
287
+ # keywords.
288
+ #
289
+ # You can spot a maximum of 1000 keywords with a single request. A single keyword
290
+ # can have a maximum length of 1024 characters, though the maximum effective length
291
+ # for double-byte languages might be shorter. Keywords are case-insensitive.
292
+ #
293
+ # See [Keyword
282
294
  # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
283
295
  # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
284
296
  # considered to match a keyword if its confidence is greater than or equal to the
@@ -323,11 +335,11 @@ module IBMWatson
323
335
  # parameter to be `true`, regardless of whether you specify `false` for the
324
336
  # parameter.
325
337
  #
326
- # **Note:** Applies to US English, Japanese, and Spanish (both broadband and
327
- # narrowband models) and UK English (narrowband model) transcription only. To
328
- # determine whether a language model supports speaker labels, you can also use the
329
- # **Get a model** method and check that the attribute `speaker_labels` is set to
330
- # `true`.
338
+ # **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
339
+ # broadband and narrowband models) and UK English (narrowband model) transcription
340
+ # only. To determine whether a language model supports speaker labels, you can also
341
+ # use the **Get a model** method and check that the attribute `speaker_labels` is
342
+ # set to `true`.
331
343
  #
332
344
  # See [Speaker
333
345
  # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
@@ -388,8 +400,33 @@ module IBMWatson
388
400
  #
389
401
  # See [Split transcript at phrase
390
402
  # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
403
+ # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
404
+ # the parameter to suppress word insertions from music, coughing, and other
405
+ # non-speech events. The service biases the audio it passes for speech recognition
406
+ # by evaluating the input audio against prior models of speech and non-speech
407
+ # activity.
408
+ #
409
+ # Specify a value between 0.0 and 1.0:
410
+ # * 0.0 suppresses all audio (no speech is transcribed).
411
+ # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
412
+ # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
413
+ #
414
+ # The values increase on a monotonic curve. See [Speech Activity
415
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
416
+ # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
417
+ # to prevent it from being transcribed as speech. Use the parameter to suppress side
418
+ # conversations or background noise.
419
+ #
420
+ # Specify a value in the range of 0.0 to 1.0:
421
+ # * 0.0 (the default) provides no suppression (background audio suppression is
422
+ # disabled).
423
+ # * 0.5 provides a reasonable level of audio suppression for general usage.
424
+ # * 1.0 suppresses all audio (no audio is transcribed).
425
+ #
426
+ # The values increase on a monotonic curve. See [Speech Activity
427
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
391
428
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
392
- def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
429
+ def recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
393
430
  raise ArgumentError.new("audio must be provided") if audio.nil?
394
431
 
395
432
  headers = {
@@ -420,7 +457,9 @@ module IBMWatson
420
457
  "redaction" => redaction,
421
458
  "audio_metrics" => audio_metrics,
422
459
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
423
- "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
460
+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
461
+ "speech_detector_sensitivity" => speech_detector_sensitivity,
462
+ "background_audio_suppression" => background_audio_suppression
424
463
  }
425
464
 
426
465
  data = audio
@@ -439,7 +478,7 @@ module IBMWatson
439
478
  end
440
479
 
441
480
  ##
442
- # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
481
+ # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
443
482
  # Sends audio for speech recognition using web sockets.
444
483
  # @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
445
484
  # @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
@@ -531,6 +570,32 @@ module IBMWatson
531
570
  #
532
571
  # See [Split transcript at phrase
533
572
  # end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
573
+ # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
574
+ # the parameter to suppress word insertions from music, coughing, and other
575
+ # non-speech events. The service biases the audio it passes for speech recognition
576
+ # by evaluating the input audio against prior models of speech and non-speech
577
+ # activity.
578
+ #
579
+ # Specify a value between 0.0 and 1.0:
580
+ # * 0.0 suppresses all audio (no speech is transcribed).
581
+ # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
582
+ # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
583
+ #
584
+ # The values increase on a monotonic curve. See [Speech Activity
585
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
586
+ # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
587
+ # to prevent it from being transcribed as speech. Use the parameter to suppress side
588
+ # conversations or background noise.
589
+ #
590
+ # Specify a value in the range of 0.0 to 1.0:
591
+ # * 0.0 (the default) provides no suppression (background audio suppression is
592
+ # disabled).
593
+ # * 0.5 provides a reasonable level of audio suppression for general usage.
594
+ # * 1.0 suppresses all audio (no audio is transcribed).
595
+ #
596
+ # The values increase on a monotonic curve. See [Speech Activity
597
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
598
+ # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
534
599
  def recognize_using_websocket(
535
600
  content_type: nil,
536
601
  recognize_callback:,
@@ -559,7 +624,9 @@ module IBMWatson
559
624
  processing_metrics_interval: nil,
560
625
  audio_metrics: nil,
561
626
  end_of_phrase_silence_time: nil,
562
- split_transcript_at_phrase_end: nil
627
+ split_transcript_at_phrase_end: nil,
628
+ speech_detector_sensitivity: nil,
629
+ background_audio_suppression: nil
563
630
  )
564
631
  raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
565
632
  raise ArgumentError("Recognize callback must be provided") if recognize_callback.nil?
@@ -599,7 +666,9 @@ module IBMWatson
599
666
  "processing_metrics_interval" => processing_metrics_interval,
600
667
  "audio_metrics" => audio_metrics,
601
668
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
602
- "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
669
+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
670
+ "speech_detector_sensitivity" => speech_detector_sensitivity,
671
+ "background_audio_suppression" => background_audio_suppression
603
672
  }
604
673
  options.delete_if { |_, v| v.nil? }
605
674
  WebSocketClient.new(audio: audio, chunk_data: chunk_data, options: options, recognize_callback: recognize_callback, service_url: service_url, headers: headers, disable_ssl_verification: @disable_ssl_verification)
@@ -717,7 +786,7 @@ module IBMWatson
717
786
  end
718
787
 
719
788
  ##
720
- # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
789
+ # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
721
790
  # Create a job.
722
791
  # Creates a job for a new asynchronous recognition request. The job is owned by the
723
792
  # instance of the service whose credentials are used to create it. How you learn the
@@ -903,8 +972,14 @@ module IBMWatson
903
972
  # @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
904
973
  # one or more string tokens. Keywords are spotted only in the final results, not in
905
974
  # interim hypotheses. If you specify any keywords, you must also specify a keywords
906
- # threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
907
- # an empty array if you do not need to spot keywords. See [Keyword
975
+ # threshold. Omit the parameter or specify an empty array if you do not need to spot
976
+ # keywords.
977
+ #
978
+ # You can spot a maximum of 1000 keywords with a single request. A single keyword
979
+ # can have a maximum length of 1024 characters, though the maximum effective length
980
+ # for double-byte languages might be shorter. Keywords are case-insensitive.
981
+ #
982
+ # See [Keyword
908
983
  # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
909
984
  # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
910
985
  # considered to match a keyword if its confidence is greater than or equal to the
@@ -949,11 +1024,11 @@ module IBMWatson
949
1024
  # parameter to be `true`, regardless of whether you specify `false` for the
950
1025
  # parameter.
951
1026
  #
952
- # **Note:** Applies to US English, Japanese, and Spanish (both broadband and
953
- # narrowband models) and UK English (narrowband model) transcription only. To
954
- # determine whether a language model supports speaker labels, you can also use the
955
- # **Get a model** method and check that the attribute `speaker_labels` is set to
956
- # `true`.
1027
+ # **Note:** Applies to US English, German, Japanese, Korean, and Spanish (both
1028
+ # broadband and narrowband models) and UK English (narrowband model) transcription
1029
+ # only. To determine whether a language model supports speaker labels, you can also
1030
+ # use the **Get a model** method and check that the attribute `speaker_labels` is
1031
+ # set to `true`.
957
1032
  #
958
1033
  # See [Speaker
959
1034
  # labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#speaker_labels).
@@ -1036,8 +1111,33 @@ module IBMWatson
1036
1111
  #
1037
1112
  # See [Split transcript at phrase
1038
1113
  # end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#split_transcript).
1114
+ # @param speech_detector_sensitivity [Float] The sensitivity of speech activity detection that the service is to perform. Use
1115
+ # the parameter to suppress word insertions from music, coughing, and other
1116
+ # non-speech events. The service biases the audio it passes for speech recognition
1117
+ # by evaluating the input audio against prior models of speech and non-speech
1118
+ # activity.
1119
+ #
1120
+ # Specify a value between 0.0 and 1.0:
1121
+ # * 0.0 suppresses all audio (no speech is transcribed).
1122
+ # * 0.5 (the default) provides a reasonable compromise for the level of sensitivity.
1123
+ # * 1.0 suppresses no audio (speech detection sensitivity is disabled).
1124
+ #
1125
+ # The values increase on a monotonic curve. See [Speech Activity
1126
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
1127
+ # @param background_audio_suppression [Float] The level to which the service is to suppress background audio based on its volume
1128
+ # to prevent it from being transcribed as speech. Use the parameter to suppress side
1129
+ # conversations or background noise.
1130
+ #
1131
+ # Specify a value in the range of 0.0 to 1.0:
1132
+ # * 0.0 (the default) provides no suppression (background audio suppression is
1133
+ # disabled).
1134
+ # * 0.5 provides a reasonable level of audio suppression for general usage.
1135
+ # * 1.0 suppresses all audio (no audio is transcribed).
1136
+ #
1137
+ # The values increase on a monotonic curve. See [Speech Activity
1138
+ # Detection](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#detection).
1039
1139
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
1040
- def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil)
1140
+ def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil, speech_detector_sensitivity: nil, background_audio_suppression: nil)
1041
1141
  raise ArgumentError.new("audio must be provided") if audio.nil?
1042
1142
 
1043
1143
  headers = {
@@ -1074,7 +1174,9 @@ module IBMWatson
1074
1174
  "processing_metrics_interval" => processing_metrics_interval,
1075
1175
  "audio_metrics" => audio_metrics,
1076
1176
  "end_of_phrase_silence_time" => end_of_phrase_silence_time,
1077
- "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
1177
+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end,
1178
+ "speech_detector_sensitivity" => speech_detector_sensitivity,
1179
+ "background_audio_suppression" => background_audio_suppression
1078
1180
  }
1079
1181
 
1080
1182
  data = audio
@@ -1600,18 +1702,20 @@ module IBMWatson
1600
1702
  #
1601
1703
  # The call returns an HTTP 201 response code if the corpus is valid. The service
1602
1704
  # then asynchronously processes the contents of the corpus and automatically
1603
- # extracts new words that it finds. This can take on the order of a minute or two to
1604
- # complete depending on the total number of words and the number of new words in the
1605
- # corpus, as well as the current load on the service. You cannot submit requests to
1606
- # add additional resources to the custom model or to train the model until the
1705
+ # extracts new words that it finds. This operation can take on the order of minutes
1706
+ # to complete depending on the total number of words and the number of new words in
1707
+ # the corpus, as well as the current load on the service. You cannot submit requests
1708
+ # to add additional resources to the custom model or to train the model until the
1607
1709
  # service's analysis of the corpus for the current request completes. Use the **List
1608
1710
  # a corpus** method to check the status of the analysis.
1609
1711
  #
1610
1712
  # The service auto-populates the model's words resource with words from the corpus
1611
- # that are not found in its base vocabulary. These are referred to as
1612
- # out-of-vocabulary (OOV) words. You can use the **List custom words** method to
1613
- # examine the words resource. You can use other words method to eliminate typos and
1614
- # modify how words are pronounced as needed.
1713
+ # that are not found in its base vocabulary. These words are referred to as
1714
+ # out-of-vocabulary (OOV) words. After adding a corpus, you must validate the words
1715
+ # resource to ensure that each OOV word's definition is complete and valid. You can
1716
+ # use the **List custom words** method to examine the words resource. You can use
1717
+ # other words method to eliminate typos and modify how words are pronounced as
1718
+ # needed.
1615
1719
  #
1616
1720
  # To add a corpus file that has the same name as an existing corpus, set the
1617
1721
  # `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
@@ -1628,10 +1732,12 @@ module IBMWatson
1628
1732
  # directly.
1629
1733
  #
1630
1734
  # **See also:**
1735
+ # * [Add a corpus to the custom language
1736
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus)
1631
1737
  # * [Working with
1632
1738
  # corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora)
1633
- # * [Add a corpus to the custom language
1634
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus).
1739
+ # * [Validating a words
1740
+ # resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
1635
1741
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1636
1742
  # the request. You must make the request with credentials for the instance of the
1637
1743
  # service that owns the custom model.
@@ -1860,7 +1966,10 @@ module IBMWatson
1860
1966
  # the parameter for words that are difficult to pronounce, foreign words, acronyms,
1861
1967
  # and so on. For example, you might specify that the word `IEEE` can sound like `i
1862
1968
  # triple e`. You can specify a maximum of five sounds-like pronunciations for a
1863
- # word.
1969
+ # word. If you omit the `sounds_like` field, the service attempts to set the field
1970
+ # to its pronunciation of the word. It cannot generate a pronunciation for all
1971
+ # words, so you must review the word's definition to ensure that it is complete and
1972
+ # valid.
1864
1973
  # * The `display_as` field provides a different way of spelling the word in a
1865
1974
  # transcript. Use the parameter when you want the word to appear different from its
1866
1975
  # usual representation or from its spelling in training data. For example, you might
@@ -1890,10 +1999,12 @@ module IBMWatson
1890
1999
  #
1891
2000
  #
1892
2001
  # **See also:**
2002
+ # * [Add words to the custom language
2003
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
1893
2004
  # * [Working with custom
1894
2005
  # words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
1895
- # * [Add words to the custom language
1896
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords).
2006
+ # * [Validating a words
2007
+ # resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
1897
2008
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1898
2009
  # the request. You must make the request with credentials for the instance of the
1899
2010
  # service that owns the custom model.
@@ -1949,7 +2060,10 @@ module IBMWatson
1949
2060
  # the parameter for words that are difficult to pronounce, foreign words, acronyms,
1950
2061
  # and so on. For example, you might specify that the word `IEEE` can sound like `i
1951
2062
  # triple e`. You can specify a maximum of five sounds-like pronunciations for a
1952
- # word.
2063
+ # word. If you omit the `sounds_like` field, the service attempts to set the field
2064
+ # to its pronunciation of the word. It cannot generate a pronunciation for all
2065
+ # words, so you must review the word's definition to ensure that it is complete and
2066
+ # valid.
1953
2067
  # * The `display_as` field provides a different way of spelling the word in a
1954
2068
  # transcript. Use the parameter when you want the word to appear different from its
1955
2069
  # usual representation or from its spelling in training data. For example, you might
@@ -1961,10 +2075,12 @@ module IBMWatson
1961
2075
  # the **List a custom word** method to review the word that you add.
1962
2076
  #
1963
2077
  # **See also:**
2078
+ # * [Add words to the custom language
2079
+ # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords)
1964
2080
  # * [Working with custom
1965
2081
  # words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords)
1966
- # * [Add words to the custom language
1967
- # model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords).
2082
+ # * [Validating a words
2083
+ # resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel).
1968
2084
  # @param customization_id [String] The customization ID (GUID) of the custom language model that is to be used for
1969
2085
  # the request. You must make the request with credentials for the instance of the
1970
2086
  # service that owns the custom model.
@@ -2148,12 +2264,12 @@ module IBMWatson
2148
2264
  #
2149
2265
  # The call returns an HTTP 201 response code if the grammar is valid. The service
2150
2266
  # then asynchronously processes the contents of the grammar and automatically
2151
- # extracts new words that it finds. This can take a few seconds to complete
2152
- # depending on the size and complexity of the grammar, as well as the current load
2153
- # on the service. You cannot submit requests to add additional resources to the
2154
- # custom model or to train the model until the service's analysis of the grammar for
2155
- # the current request completes. Use the **Get a grammar** method to check the
2156
- # status of the analysis.
2267
+ # extracts new words that it finds. This operation can take a few seconds or minutes
2268
+ # to complete depending on the size and complexity of the grammar, as well as the
2269
+ # current load on the service. You cannot submit requests to add additional
2270
+ # resources to the custom model or to train the model until the service's analysis
2271
+ # of the grammar for the current request completes. Use the **Get a grammar** method
2272
+ # to check the status of the analysis.
2157
2273
  #
2158
2274
  # The service populates the model's words resource with any word that is recognized
2159
2275
  # by the grammar that is not found in the model's base vocabulary. These are
@@ -2500,7 +2616,7 @@ module IBMWatson
2500
2616
  # to complete depending on the total amount of audio data on which the custom
2501
2617
  # acoustic model is being trained and the current load on the service. Typically,
2502
2618
  # training a custom acoustic model takes approximately two to four times the length
2503
- # of its audio data. The range of time depends on the model being trained and the
2619
+ # of its audio data. The actual time depends on the model being trained and the
2504
2620
  # nature of the audio, such as whether the audio is clean or noisy. The method
2505
2621
  # returns an HTTP 200 response code to indicate that the training process has begun.
2506
2622
  #
@@ -2519,8 +2635,9 @@ module IBMWatson
2519
2635
  # Train with a custom language model if you have verbatim transcriptions of the
2520
2636
  # audio files that you have added to the custom model or you have either corpora
2521
2637
  # (text files) or a list of words that are relevant to the contents of the audio
2522
- # files. Both of the custom models must be based on the same version of the same
2523
- # base model for training to succeed.
2638
+ # files. For training to succeed, both of the custom models must be based on the
2639
+ # same version of the same base model, and the custom language model must be fully
2640
+ # trained and available.
2524
2641
  #
2525
2642
  # **See also:**
2526
2643
  # * [Train the custom acoustic
@@ -2536,6 +2653,9 @@ module IBMWatson
2536
2653
  # another training request or a request to add audio resources to the model.
2537
2654
  # * The custom model contains less than 10 minutes or more than 200 hours of audio
2538
2655
  # data.
2656
+ # * You passed a custom language model with the `custom_language_model_id` query
2657
+ # parameter that is not in the available state. A custom language model must be
2658
+ # fully trained and available to be used to train a custom acoustic model.
2539
2659
  # * You passed an incompatible custom language model with the
2540
2660
  # `custom_language_model_id` query parameter. Both custom models must be based on
2541
2661
  # the same version of the same base model.
@@ -2551,8 +2671,8 @@ module IBMWatson
2551
2671
  # been trained with verbatim transcriptions of the audio resources or that contains
2552
2672
  # words that are relevant to the contents of the audio resources. The custom
2553
2673
  # language model must be based on the same version of the same base model as the
2554
- # custom acoustic model. The credentials specified with the request must own both
2555
- # custom models.
2674
+ # custom acoustic model, and the custom language model must be fully trained and
2675
+ # available. The credentials specified with the request must own both custom models.
2556
2676
  # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
2557
2677
  def train_acoustic_model(customization_id:, custom_language_model_id: nil)
2558
2678
  raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
@@ -2650,8 +2770,9 @@ module IBMWatson
2650
2770
  # service that owns the custom model.
2651
2771
  # @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
2652
2772
  # customization ID (GUID) of that custom language model. The custom language model
2653
- # must be upgraded before the custom acoustic model can be upgraded. The credentials
2654
- # specified with the request must own both custom models.
2773
+ # must be upgraded before the custom acoustic model can be upgraded. The custom
2774
+ # language model must be fully trained and available. The credentials specified with
2775
+ # the request must own both custom models.
2655
2776
  # @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
2656
2777
  # has been modified since it was last trained. Use this parameter only to force the
2657
2778
  # upgrade of a custom acoustic model that is trained with a custom language model,
@@ -2746,14 +2867,14 @@ module IBMWatson
2746
2867
  # same name as an existing audio resource, set the `allow_overwrite` parameter to
2747
2868
  # `true`; otherwise, the request fails.
2748
2869
  #
2749
- # The method is asynchronous. It can take several seconds to complete depending on
2750
- # the duration of the audio and, in the case of an archive file, the total number of
2751
- # audio files being processed. The service returns a 201 response code if the audio
2752
- # is valid. It then asynchronously analyzes the contents of the audio file or files
2753
- # and automatically extracts information about the audio such as its length,
2754
- # sampling rate, and encoding. You cannot submit requests to train or upgrade the
2755
- # model until the service's analysis of all audio resources for current requests
2756
- # completes.
2870
+ # The method is asynchronous. It can take several seconds or minutes to complete
2871
+ # depending on the duration of the audio and, in the case of an archive file, the
2872
+ # total number of audio files being processed. The service returns a 201 response
2873
+ # code if the audio is valid. It then asynchronously analyzes the contents of the
2874
+ # audio file or files and automatically extracts information about the audio such as
2875
+ # its length, sampling rate, and encoding. You cannot submit requests to train or
2876
+ # upgrade the model until the service's analysis of all audio resources for current
2877
+ # requests completes.
2757
2878
  #
2758
2879
  # To determine the status of the service's analysis of the audio, use the **Get an
2759
2880
  # audio resource** method to poll the status of the audio. The method accepts the