google-cloud-speech-v2 0.1.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -275,22 +275,44 @@ module Google
275
275
  # When using this model, the service will stop transcribing audio after the
276
276
  # first utterance is detected and completed.
277
277
  #
278
- # When using this model,
279
- # {::Google::Cloud::Speech::V2::RecognitionFeatures::MultiChannelMode::SEPARATE_RECOGNITION_PER_CHANNEL SEPARATE_RECOGNITION_PER_CHANNEL}
280
- # is not supported; multi-channel audio is accepted, but only the first
281
- # channel will be processed and transcribed.
278
+ # When using this model,
279
+ # {::Google::Cloud::Speech::V2::RecognitionFeatures::MultiChannelMode::SEPARATE_RECOGNITION_PER_CHANNEL SEPARATE_RECOGNITION_PER_CHANNEL}
280
+ # is not supported; multi-channel audio is accepted, but only the first
281
+ # channel will be processed and transcribed.
282
+ #
283
+ # - `telephony`
284
+ #
285
+ # Best for audio that originated from a phone call (typically recorded at
286
+ # an 8khz sampling rate).
287
+ #
288
+ # - `medical_conversation`
289
+ #
290
+ # For conversations between a medical provider—for example, a doctor or
291
+ # nurse—and a patient. Use this model when both a provider and a patient
292
+ # are speaking. Words uttered by each speaker are automatically detected
293
+ # and labeled in the returned transcript.
294
+ #
295
+ # For supported features please see [medical models
296
+ # documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
297
+ #
298
+ # - `medical_dictation`
299
+ #
300
+ # For dictated notes spoken by a single medical provider—for example, a
301
+ # doctor dictating notes about a patient's blood test results.
302
+ #
303
+ # For supported features please see [medical models
304
+ # documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
305
+ #
306
+ # - `usm`
307
+ #
308
+ # The next generation of Speech-to-Text models from Google.
282
309
  # @!attribute [rw] language_codes
283
310
  # @return [::Array<::String>]
284
311
  # Required. The language of the supplied audio as a
285
312
  # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
286
313
  #
287
- # Supported languages:
288
- #
289
- # - `en-US`
290
- #
291
- # - `en-GB`
292
- #
293
- # - `fr-FR`
314
+ # Supported languages for each model are listed at:
315
+ # https://cloud.google.com/speech-to-text/docs/languages
294
316
  #
295
317
  # If additional languages are provided, recognition result will contain
296
318
  # recognition in the most likely language detected. The recognition result
@@ -373,14 +395,23 @@ module Google
373
395
 
374
396
  # Automatically detected decoding parameters.
375
397
  # Supported for the following encodings:
398
+ #
376
399
  # * WAV_LINEAR16: 16-bit signed little-endian PCM samples in a WAV container.
400
+ #
377
401
  # * WAV_MULAW: 8-bit companded mulaw samples in a WAV container.
402
+ #
378
403
  # * WAV_ALAW: 8-bit companded alaw samples in a WAV container.
404
+ #
379
405
  # * RFC4867_5_AMR: AMR frames with an rfc4867.5 header.
406
+ #
380
407
  # * RFC4867_5_AMRWB: AMR-WB frames with an rfc4867.5 header.
408
+ #
381
409
  # * FLAC: FLAC frames in the "native FLAC" container format.
410
+ #
382
411
  # * MP3: MPEG audio frames with optional (ignored) ID3 metadata.
412
+ #
383
413
  # * OGG_OPUS: Opus audio frames in an Ogg container.
414
+ #
384
415
  # * WEBM_OPUS: Opus audio frames in a WebM container.
385
416
  class AutoDetectDecodingConfig
386
417
  include ::Google::Protobuf::MessageExts
@@ -398,16 +429,24 @@ module Google
398
429
  # sampling rate of the audio source to 16000 Hz. If that's not possible, use
399
430
  # the native sample rate of the audio source (instead of re-sampling).
400
431
  # Supported for the following encodings:
432
+ #
401
433
  # * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
434
+ #
402
435
  # * MULAW: Headerless 8-bit companded mulaw samples.
436
+ #
403
437
  # * ALAW: Headerless 8-bit companded alaw samples.
404
438
  # @!attribute [rw] audio_channel_count
405
439
  # @return [::Integer]
406
440
  # Number of channels present in the audio data sent for recognition.
407
441
  # Supported for the following encodings:
442
+ #
408
443
  # * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
444
+ #
409
445
  # * MULAW: Headerless 8-bit companded mulaw samples.
446
+ #
410
447
  # * ALAW: Headerless 8-bit companded alaw samples.
448
+ #
449
+ # The maximum allowed value is 8.
411
450
  class ExplicitDecodingConfig
412
451
  include ::Google::Protobuf::MessageExts
413
452
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -433,7 +472,7 @@ module Google
433
472
  # @return [::Integer]
434
473
  # Required. Minimum number of speakers in the conversation. This range gives
435
474
  # you more flexibility by allowing the system to automatically determine the
436
- # correct number of speakers. If not set, the default value is 2.
475
+ # correct number of speakers.
437
476
  #
438
477
  # To fix the number of speakers detected in the audio, set
439
478
  # `min_speaker_count` = `max_speaker_count`.
@@ -521,28 +560,28 @@ module Google
521
560
  end
522
561
 
523
562
  # Provides "hints" to the speech recognizer to favor specific words and phrases
524
- # in the results. Phrase sets can be specified as an inline resource, or a
525
- # reference to an existing phrase set resource.
563
+ # in the results. PhraseSets can be specified as an inline resource, or a
564
+ # reference to an existing PhraseSet resource.
526
565
  # @!attribute [rw] phrase_sets
527
566
  # @return [::Array<::Google::Cloud::Speech::V2::SpeechAdaptation::AdaptationPhraseSet>]
528
- # A list of inline or referenced phrase sets.
567
+ # A list of inline or referenced PhraseSets.
529
568
  # @!attribute [rw] custom_classes
530
569
  # @return [::Array<::Google::Cloud::Speech::V2::CustomClass>]
531
- # A list of inline custom classes. Existing custom class resources can be
532
- # referenced directly in a phrase set.
570
+ # A list of inline CustomClasses. Existing CustomClass resources can be
571
+ # referenced directly in a PhraseSet.
533
572
  class SpeechAdaptation
534
573
  include ::Google::Protobuf::MessageExts
535
574
  extend ::Google::Protobuf::MessageExts::ClassMethods
536
575
 
537
- # A biasing phrase set, which can be either a string referencing the name of
538
- # an existing phrase set resource, or an inline definition of a phrase set.
576
+ # A biasing PhraseSet, which can be either a string referencing the name of
577
+ # an existing PhraseSets resource, or an inline definition of a PhraseSet.
539
578
  # @!attribute [rw] phrase_set
540
579
  # @return [::String]
541
- # The name of an existing phrase set resource. The user must have read
580
+ # The name of an existing PhraseSet resource. The user must have read
542
581
  # access to the resource and it must not be deleted.
543
582
  # @!attribute [rw] inline_phrase_set
544
583
  # @return [::Google::Cloud::Speech::V2::PhraseSet]
545
- # An inline defined phrase set.
584
+ # An inline defined PhraseSet.
546
585
  class AdaptationPhraseSet
547
586
  include ::Google::Protobuf::MessageExts
548
587
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -651,9 +690,9 @@ module Google
651
690
  # @!attribute [rw] words
652
691
  # @return [::Array<::Google::Cloud::Speech::V2::WordInfo>]
653
692
  # A list of word-specific information for each recognized word.
654
- # When
655
- # [enable_speaker_diarization][google.cloud.speech.v2.SpeakerDiarizationConfig.enable_speaker_diarization]
656
- # is true, you will see all the words from the beginning of the audio.
693
+ # When the
694
+ # {::Google::Cloud::Speech::V2::SpeakerDiarizationConfig SpeakerDiarizationConfig}
695
+ # is set, you will see all the words from the beginning of the audio.
657
696
  class SpeechRecognitionAlternative
658
697
  include ::Google::Protobuf::MessageExts
659
698
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -694,8 +733,8 @@ module Google
694
733
  # A distinct label is assigned for every speaker within the audio. This field
695
734
  # specifies which one of those speakers was detected to have spoken this
696
735
  # word. `speaker_label` is set if
697
- # [enable_speaker_diarization][google.cloud.speech.v2.SpeakerDiarizationConfig.enable_speaker_diarization]
698
- # is `true` and only in the top alternative.
736
+ # {::Google::Cloud::Speech::V2::SpeakerDiarizationConfig SpeakerDiarizationConfig}
737
+ # is given and only in the top alternative.
699
738
  class WordInfo
700
739
  include ::Google::Protobuf::MessageExts
701
740
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -795,9 +834,9 @@ module Google
795
834
  # of the recognizer during this recognition request. If no mask is provided,
796
835
  # all non-default valued fields in
797
836
  # {::Google::Cloud::Speech::V2::StreamingRecognitionConfig#config config} override
798
- # the values in the recognizer for this recognition request. If a mask is
837
+ # the values in the Recognizer for this recognition request. If a mask is
799
838
  # provided, only the fields listed in the mask override the config in the
800
- # recognizer for this recognition request. If a wildcard (`*`) is provided,
839
+ # Recognizer for this recognition request. If a wildcard (`*`) is provided,
801
840
  # {::Google::Cloud::Speech::V2::StreamingRecognitionConfig#config config}
802
841
  # completely overrides and replaces the config in the recognizer for this
803
842
  # recognition request.
@@ -841,6 +880,7 @@ module Google
841
880
  # @!attribute [rw] audio
842
881
  # @return [::String]
843
882
  # Inline audio bytes to be Recognized.
883
+ # Maximum size for this field is 15 KB per request.
844
884
  class StreamingRecognizeRequest
845
885
  include ::Google::Protobuf::MessageExts
846
886
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -878,17 +918,57 @@ module Google
878
918
  # @!attribute [rw] files
879
919
  # @return [::Array<::Google::Cloud::Speech::V2::BatchRecognizeFileMetadata>]
880
920
  # Audio files with file metadata for ASR.
921
+ # The maximum number of files allowed to be specified is 5.
922
+ # @!attribute [rw] recognition_output_config
923
+ # @return [::Google::Cloud::Speech::V2::RecognitionOutputConfig]
924
+ # Configuration options for where to output the transcripts of each file.
881
925
  class BatchRecognizeRequest
882
926
  include ::Google::Protobuf::MessageExts
883
927
  extend ::Google::Protobuf::MessageExts::ClassMethods
884
928
  end
885
929
 
930
+ # Output configurations for Cloud Storage.
931
+ # @!attribute [rw] uri
932
+ # @return [::String]
933
+ # The Cloud Storage URI prefix with which recognition results will be
934
+ # written.
935
+ class GcsOutputConfig
936
+ include ::Google::Protobuf::MessageExts
937
+ extend ::Google::Protobuf::MessageExts::ClassMethods
938
+ end
939
+
940
+ # Output configurations for inline response.
941
+ class InlineOutputConfig
942
+ include ::Google::Protobuf::MessageExts
943
+ extend ::Google::Protobuf::MessageExts::ClassMethods
944
+ end
945
+
946
+ # Configuration options for the output(s) of recognition.
947
+ # @!attribute [rw] gcs_output_config
948
+ # @return [::Google::Cloud::Speech::V2::GcsOutputConfig]
949
+ # If this message is populated, recognition results are written to the
950
+ # provided Google Cloud Storage URI.
951
+ # @!attribute [rw] inline_response_config
952
+ # @return [::Google::Cloud::Speech::V2::InlineOutputConfig]
953
+ # If this message is populated, recognition results are provided in the
954
+ # {::Google::Cloud::Speech::V2::BatchRecognizeResponse BatchRecognizeResponse}
955
+ # message of the Operation when completed. This is only supported when
956
+ # calling {::Google::Cloud::Speech::V2::Speech::Client#batch_recognize BatchRecognize}
957
+ # with just one audio file.
958
+ class RecognitionOutputConfig
959
+ include ::Google::Protobuf::MessageExts
960
+ extend ::Google::Protobuf::MessageExts::ClassMethods
961
+ end
962
+
886
963
  # Response message for
887
964
  # {::Google::Cloud::Speech::V2::Speech::Client#batch_recognize BatchRecognize} that is
888
965
  # packaged into a longrunning {::Google::Longrunning::Operation Operation}.
889
966
  # @!attribute [rw] results
890
967
  # @return [::Google::Protobuf::Map{::String => ::Google::Cloud::Speech::V2::BatchRecognizeFileResult}]
891
968
  # Map from filename to the final result for that file.
969
+ # @!attribute [rw] total_billed_duration
970
+ # @return [::Google::Protobuf::Duration]
971
+ # When available, billed audio seconds for the corresponding request.
892
972
  class BatchRecognizeResponse
893
973
  include ::Google::Protobuf::MessageExts
894
974
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -903,13 +983,36 @@ module Google
903
983
  end
904
984
  end
905
985
 
986
+ # Output type for Cloud Storage of BatchRecognize transcripts. Though this
987
+ # proto isn't returned in this API anywhere, the Cloud Storage transcripts will
988
+ # be this proto serialized and should be parsed as such.
989
+ # @!attribute [rw] results
990
+ # @return [::Array<::Google::Cloud::Speech::V2::SpeechRecognitionResult>]
991
+ # Sequential list of transcription results corresponding to sequential
992
+ # portions of audio.
993
+ # @!attribute [rw] metadata
994
+ # @return [::Google::Cloud::Speech::V2::RecognitionResponseMetadata]
995
+ # Metadata about the recognition.
996
+ class BatchRecognizeResults
997
+ include ::Google::Protobuf::MessageExts
998
+ extend ::Google::Protobuf::MessageExts::ClassMethods
999
+ end
1000
+
906
1001
  # Final results for a single file.
907
1002
  # @!attribute [rw] uri
908
1003
  # @return [::String]
909
- # The GCS URI to which recognition results were written.
1004
+ # The Cloud Storage URI to which recognition results were written.
910
1005
  # @!attribute [rw] error
911
1006
  # @return [::Google::Rpc::Status]
912
1007
  # Error if one was encountered.
1008
+ # @!attribute [rw] metadata
1009
+ # @return [::Google::Cloud::Speech::V2::RecognitionResponseMetadata]
1010
+ # @!attribute [rw] transcript
1011
+ # @return [::Google::Cloud::Speech::V2::BatchRecognizeResults]
1012
+ # The transcript for the audio file. This is populated only when
1013
+ # {::Google::Cloud::Speech::V2::InlineOutputConfig InlineOutputConfig} is set in
1014
+ # the
1015
+ # [RecognitionOutputConfig][[google.cloud.speech.v2.RecognitionOutputConfig].
913
1016
  class BatchRecognizeFileResult
914
1017
  include ::Google::Protobuf::MessageExts
915
1018
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -925,7 +1028,7 @@ module Google
925
1028
  # Error if one was encountered.
926
1029
  # @!attribute [rw] uri
927
1030
  # @return [::String]
928
- # The GCS URI to which recognition results will be written.
1031
+ # The Cloud Storage URI to which recognition results will be written.
929
1032
  class BatchRecognizeTranscriptionMetadata
930
1033
  include ::Google::Protobuf::MessageExts
931
1034
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -1290,7 +1393,8 @@ module Google
1290
1393
  # phrase will be recognized over other similar sounding phrases. The higher
1291
1394
  # the boost, the higher the chance of false positive recognition as well.
1292
1395
  # Valid `boost` values are between 0 (exclusive) and 20. We recommend using a
1293
- # binary search approach to finding the optimal value for your use case.
1396
+ # binary search approach to finding the optimal value for your use case as
1397
+ # well as adding phrases both with and without boost to your requests.
1294
1398
  # @!attribute [rw] display_name
1295
1399
  # @return [::String]
1296
1400
  # User-settable, human-readable name for the PhraseSet. Must be 63
@@ -1358,11 +1462,11 @@ module Google
1358
1462
  # be recognized over other similar sounding phrases. The higher the boost,
1359
1463
  # the higher the chance of false positive recognition as well. Negative
1360
1464
  # boost values would correspond to anti-biasing. Anti-biasing is not
1361
- # enabled, so negative boost will simply be ignored. Though `boost` can
1362
- # accept a wide range of positive values, most use cases are best served
1363
- # with values between 0 and 20. We recommend using a binary search approach
1364
- # to finding the optimal value for your use case. Speech recognition
1365
- # will skip PhraseSets with a boost value of 0.
1465
+ # enabled, so negative boost values will return an error. Boost values must
1466
+ # be between 0 and 20. Any values outside that range will return an error.
1467
+ # We recommend using a binary search approach to finding the optimal value
1468
+ # for your use case as well as adding phrases both with and without boost
1469
+ # to your requests.
1366
1470
  class Phrase
1367
1471
  include ::Google::Protobuf::MessageExts
1368
1472
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -28,12 +28,14 @@ module Google
28
28
  # [API Design Guide](https://cloud.google.com/apis/design/errors).
29
29
  # @!attribute [rw] code
30
30
  # @return [::Integer]
31
- # The status code, which should be an enum value of [google.rpc.Code][google.rpc.Code].
31
+ # The status code, which should be an enum value of
32
+ # [google.rpc.Code][google.rpc.Code].
32
33
  # @!attribute [rw] message
33
34
  # @return [::String]
34
35
  # A developer-facing error message, which should be in English. Any
35
36
  # user-facing error message should be localized and sent in the
36
- # {::Google::Rpc::Status#details google.rpc.Status.details} field, or localized by the client.
37
+ # {::Google::Rpc::Status#details google.rpc.Status.details} field, or localized
38
+ # by the client.
37
39
  # @!attribute [rw] details
38
40
  # @return [::Array<::Google::Protobuf::Any>]
39
41
  # A list of messages that carry the error details. There is a common set of
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech-v2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Google LLC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-03 00:00:00.000000000 Z
11
+ date: 2023-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gapic-common
@@ -16,7 +16,7 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0.12'
19
+ version: 0.18.0
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
22
  version: 2.a
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- version: '0.12'
29
+ version: 0.18.0
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: 2.a
@@ -44,20 +44,40 @@ dependencies:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
46
  version: '1.0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: google-cloud-location
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0.4'
54
+ - - "<"
55
+ - !ruby/object:Gem::Version
56
+ version: 2.a
57
+ type: :runtime
58
+ prerelease: false
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0.4'
64
+ - - "<"
65
+ - !ruby/object:Gem::Version
66
+ version: 2.a
47
67
  - !ruby/object:Gem::Dependency
48
68
  name: google-style
49
69
  requirement: !ruby/object:Gem::Requirement
50
70
  requirements:
51
71
  - - "~>"
52
72
  - !ruby/object:Gem::Version
53
- version: 1.26.1
73
+ version: 1.26.3
54
74
  type: :development
55
75
  prerelease: false
56
76
  version_requirements: !ruby/object:Gem::Requirement
57
77
  requirements:
58
78
  - - "~>"
59
79
  - !ruby/object:Gem::Version
60
- version: 1.26.1
80
+ version: 1.26.3
61
81
  - !ruby/object:Gem::Dependency
62
82
  name: minitest
63
83
  requirement: !ruby/object:Gem::Requirement
@@ -175,16 +195,24 @@ files:
175
195
  - README.md
176
196
  - lib/google-cloud-speech-v2.rb
177
197
  - lib/google/cloud/speech/v2.rb
198
+ - lib/google/cloud/speech/v2/bindings_override.rb
178
199
  - lib/google/cloud/speech/v2/cloud_speech_pb.rb
179
200
  - lib/google/cloud/speech/v2/cloud_speech_services_pb.rb
201
+ - lib/google/cloud/speech/v2/rest.rb
180
202
  - lib/google/cloud/speech/v2/speech.rb
181
203
  - lib/google/cloud/speech/v2/speech/client.rb
182
204
  - lib/google/cloud/speech/v2/speech/credentials.rb
183
205
  - lib/google/cloud/speech/v2/speech/operations.rb
184
206
  - lib/google/cloud/speech/v2/speech/paths.rb
207
+ - lib/google/cloud/speech/v2/speech/rest.rb
208
+ - lib/google/cloud/speech/v2/speech/rest/client.rb
209
+ - lib/google/cloud/speech/v2/speech/rest/operations.rb
210
+ - lib/google/cloud/speech/v2/speech/rest/service_stub.rb
185
211
  - lib/google/cloud/speech/v2/version.rb
186
212
  - proto_docs/README.md
213
+ - proto_docs/google/api/client.rb
187
214
  - proto_docs/google/api/field_behavior.rb
215
+ - proto_docs/google/api/launch_stage.rb
188
216
  - proto_docs/google/api/resource.rb
189
217
  - proto_docs/google/cloud/speech/v2/cloud_speech.rb
190
218
  - proto_docs/google/longrunning/operations.rb
@@ -213,8 +241,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
213
241
  - !ruby/object:Gem::Version
214
242
  version: '0'
215
243
  requirements: []
216
- rubygems_version: 3.3.14
244
+ rubygems_version: 3.4.2
217
245
  signing_key:
218
246
  specification_version: 4
219
- summary: API Client library for the Cloud Speech-to-Text V2 API
247
+ summary: Converts audio to text by applying powerful neural network models.
220
248
  test_files: []