google-cloud-speech-v2 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -275,22 +275,44 @@ module Google
275
275
  # When using this model, the service will stop transcribing audio after the
276
276
  # first utterance is detected and completed.
277
277
  #
278
- # When using this model,
279
- # {::Google::Cloud::Speech::V2::RecognitionFeatures::MultiChannelMode::SEPARATE_RECOGNITION_PER_CHANNEL SEPARATE_RECOGNITION_PER_CHANNEL}
280
- # is not supported; multi-channel audio is accepted, but only the first
281
- # channel will be processed and transcribed.
278
+ # When using this model,
279
+ # {::Google::Cloud::Speech::V2::RecognitionFeatures::MultiChannelMode::SEPARATE_RECOGNITION_PER_CHANNEL SEPARATE_RECOGNITION_PER_CHANNEL}
280
+ # is not supported; multi-channel audio is accepted, but only the first
281
+ # channel will be processed and transcribed.
282
+ #
283
+ # - `telephony`
284
+ #
285
+ # Best for audio that originated from a phone call (typically recorded at
286
+ # an 8khz sampling rate).
287
+ #
288
+ # - `medical_conversation`
289
+ #
290
+ # For conversations between a medical provider—for example, a doctor or
291
+ # nurse—and a patient. Use this model when both a provider and a patient
292
+ # are speaking. Words uttered by each speaker are automatically detected
293
+ # and labeled in the returned transcript.
294
+ #
295
+ # For supported features please see [medical models
296
+ # documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
297
+ #
298
+ # - `medical_dictation`
299
+ #
300
+ # For dictated notes spoken by a single medical provider—for example, a
301
+ # doctor dictating notes about a patient's blood test results.
302
+ #
303
+ # For supported features please see [medical models
304
+ # documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
305
+ #
306
+ # - `usm`
307
+ #
308
+ # The next generation of Speech-to-Text models from Google.
282
309
  # @!attribute [rw] language_codes
283
310
  # @return [::Array<::String>]
284
311
  # Required. The language of the supplied audio as a
285
312
  # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
286
313
  #
287
- # Supported languages:
288
- #
289
- # - `en-US`
290
- #
291
- # - `en-GB`
292
- #
293
- # - `fr-FR`
314
+ # Supported languages for each model are listed at:
315
+ # https://cloud.google.com/speech-to-text/docs/languages
294
316
  #
295
317
  # If additional languages are provided, recognition result will contain
296
318
  # recognition in the most likely language detected. The recognition result
@@ -373,14 +395,23 @@ module Google
373
395
 
374
396
  # Automatically detected decoding parameters.
375
397
  # Supported for the following encodings:
398
+ #
376
399
  # * WAV_LINEAR16: 16-bit signed little-endian PCM samples in a WAV container.
400
+ #
377
401
  # * WAV_MULAW: 8-bit companded mulaw samples in a WAV container.
402
+ #
378
403
  # * WAV_ALAW: 8-bit companded alaw samples in a WAV container.
404
+ #
379
405
  # * RFC4867_5_AMR: AMR frames with an rfc4867.5 header.
406
+ #
380
407
  # * RFC4867_5_AMRWB: AMR-WB frames with an rfc4867.5 header.
408
+ #
381
409
  # * FLAC: FLAC frames in the "native FLAC" container format.
410
+ #
382
411
  # * MP3: MPEG audio frames with optional (ignored) ID3 metadata.
412
+ #
383
413
  # * OGG_OPUS: Opus audio frames in an Ogg container.
414
+ #
384
415
  # * WEBM_OPUS: Opus audio frames in a WebM container.
385
416
  class AutoDetectDecodingConfig
386
417
  include ::Google::Protobuf::MessageExts
@@ -398,16 +429,24 @@ module Google
398
429
  # sampling rate of the audio source to 16000 Hz. If that's not possible, use
399
430
  # the native sample rate of the audio source (instead of re-sampling).
400
431
  # Supported for the following encodings:
432
+ #
401
433
  # * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
434
+ #
402
435
  # * MULAW: Headerless 8-bit companded mulaw samples.
436
+ #
403
437
  # * ALAW: Headerless 8-bit companded alaw samples.
404
438
  # @!attribute [rw] audio_channel_count
405
439
  # @return [::Integer]
406
440
  # Number of channels present in the audio data sent for recognition.
407
441
  # Supported for the following encodings:
442
+ #
408
443
  # * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
444
+ #
409
445
  # * MULAW: Headerless 8-bit companded mulaw samples.
446
+ #
410
447
  # * ALAW: Headerless 8-bit companded alaw samples.
448
+ #
449
+ # The maximum allowed value is 8.
411
450
  class ExplicitDecodingConfig
412
451
  include ::Google::Protobuf::MessageExts
413
452
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -433,7 +472,7 @@ module Google
433
472
  # @return [::Integer]
434
473
  # Required. Minimum number of speakers in the conversation. This range gives
435
474
  # you more flexibility by allowing the system to automatically determine the
436
- # correct number of speakers. If not set, the default value is 2.
475
+ # correct number of speakers.
437
476
  #
438
477
  # To fix the number of speakers detected in the audio, set
439
478
  # `min_speaker_count` = `max_speaker_count`.
@@ -521,28 +560,28 @@ module Google
521
560
  end
522
561
 
523
562
  # Provides "hints" to the speech recognizer to favor specific words and phrases
524
- # in the results. Phrase sets can be specified as an inline resource, or a
525
- # reference to an existing phrase set resource.
563
+ # in the results. PhraseSets can be specified as an inline resource, or a
564
+ # reference to an existing PhraseSet resource.
526
565
  # @!attribute [rw] phrase_sets
527
566
  # @return [::Array<::Google::Cloud::Speech::V2::SpeechAdaptation::AdaptationPhraseSet>]
528
- # A list of inline or referenced phrase sets.
567
+ # A list of inline or referenced PhraseSets.
529
568
  # @!attribute [rw] custom_classes
530
569
  # @return [::Array<::Google::Cloud::Speech::V2::CustomClass>]
531
- # A list of inline custom classes. Existing custom class resources can be
532
- # referenced directly in a phrase set.
570
+ # A list of inline CustomClasses. Existing CustomClass resources can be
571
+ # referenced directly in a PhraseSet.
533
572
  class SpeechAdaptation
534
573
  include ::Google::Protobuf::MessageExts
535
574
  extend ::Google::Protobuf::MessageExts::ClassMethods
536
575
 
537
- # A biasing phrase set, which can be either a string referencing the name of
538
- # an existing phrase set resource, or an inline definition of a phrase set.
576
+ # A biasing PhraseSet, which can be either a string referencing the name of
577
+ # an existing PhraseSets resource, or an inline definition of a PhraseSet.
539
578
  # @!attribute [rw] phrase_set
540
579
  # @return [::String]
541
- # The name of an existing phrase set resource. The user must have read
580
+ # The name of an existing PhraseSet resource. The user must have read
542
581
  # access to the resource and it must not be deleted.
543
582
  # @!attribute [rw] inline_phrase_set
544
583
  # @return [::Google::Cloud::Speech::V2::PhraseSet]
545
- # An inline defined phrase set.
584
+ # An inline defined PhraseSet.
546
585
  class AdaptationPhraseSet
547
586
  include ::Google::Protobuf::MessageExts
548
587
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -651,9 +690,9 @@ module Google
651
690
  # @!attribute [rw] words
652
691
  # @return [::Array<::Google::Cloud::Speech::V2::WordInfo>]
653
692
  # A list of word-specific information for each recognized word.
654
- # When
655
- # [enable_speaker_diarization][google.cloud.speech.v2.SpeakerDiarizationConfig.enable_speaker_diarization]
656
- # is true, you will see all the words from the beginning of the audio.
693
+ # When the
694
+ # {::Google::Cloud::Speech::V2::SpeakerDiarizationConfig SpeakerDiarizationConfig}
695
+ # is set, you will see all the words from the beginning of the audio.
657
696
  class SpeechRecognitionAlternative
658
697
  include ::Google::Protobuf::MessageExts
659
698
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -694,8 +733,8 @@ module Google
694
733
  # A distinct label is assigned for every speaker within the audio. This field
695
734
  # specifies which one of those speakers was detected to have spoken this
696
735
  # word. `speaker_label` is set if
697
- # [enable_speaker_diarization][google.cloud.speech.v2.SpeakerDiarizationConfig.enable_speaker_diarization]
698
- # is `true` and only in the top alternative.
736
+ # {::Google::Cloud::Speech::V2::SpeakerDiarizationConfig SpeakerDiarizationConfig}
737
+ # is given and only in the top alternative.
699
738
  class WordInfo
700
739
  include ::Google::Protobuf::MessageExts
701
740
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -795,9 +834,9 @@ module Google
795
834
  # of the recognizer during this recognition request. If no mask is provided,
796
835
  # all non-default valued fields in
797
836
  # {::Google::Cloud::Speech::V2::StreamingRecognitionConfig#config config} override
798
- # the values in the recognizer for this recognition request. If a mask is
837
+ # the values in the Recognizer for this recognition request. If a mask is
799
838
  # provided, only the fields listed in the mask override the config in the
800
- # recognizer for this recognition request. If a wildcard (`*`) is provided,
839
+ # Recognizer for this recognition request. If a wildcard (`*`) is provided,
801
840
  # {::Google::Cloud::Speech::V2::StreamingRecognitionConfig#config config}
802
841
  # completely overrides and replaces the config in the recognizer for this
803
842
  # recognition request.
@@ -841,6 +880,7 @@ module Google
841
880
  # @!attribute [rw] audio
842
881
  # @return [::String]
843
882
  # Inline audio bytes to be Recognized.
883
+ # Maximum size for this field is 15 KB per request.
844
884
  class StreamingRecognizeRequest
845
885
  include ::Google::Protobuf::MessageExts
846
886
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -878,17 +918,57 @@ module Google
878
918
  # @!attribute [rw] files
879
919
  # @return [::Array<::Google::Cloud::Speech::V2::BatchRecognizeFileMetadata>]
880
920
  # Audio files with file metadata for ASR.
921
+ # The maximum number of files allowed to be specified is 5.
922
+ # @!attribute [rw] recognition_output_config
923
+ # @return [::Google::Cloud::Speech::V2::RecognitionOutputConfig]
924
+ # Configuration options for where to output the transcripts of each file.
881
925
  class BatchRecognizeRequest
882
926
  include ::Google::Protobuf::MessageExts
883
927
  extend ::Google::Protobuf::MessageExts::ClassMethods
884
928
  end
885
929
 
930
+ # Output configurations for Cloud Storage.
931
+ # @!attribute [rw] uri
932
+ # @return [::String]
933
+ # The Cloud Storage URI prefix with which recognition results will be
934
+ # written.
935
+ class GcsOutputConfig
936
+ include ::Google::Protobuf::MessageExts
937
+ extend ::Google::Protobuf::MessageExts::ClassMethods
938
+ end
939
+
940
+ # Output configurations for inline response.
941
+ class InlineOutputConfig
942
+ include ::Google::Protobuf::MessageExts
943
+ extend ::Google::Protobuf::MessageExts::ClassMethods
944
+ end
945
+
946
+ # Configuration options for the output(s) of recognition.
947
+ # @!attribute [rw] gcs_output_config
948
+ # @return [::Google::Cloud::Speech::V2::GcsOutputConfig]
949
+ # If this message is populated, recognition results are written to the
950
+ # provided Google Cloud Storage URI.
951
+ # @!attribute [rw] inline_response_config
952
+ # @return [::Google::Cloud::Speech::V2::InlineOutputConfig]
953
+ # If this message is populated, recognition results are provided in the
954
+ # {::Google::Cloud::Speech::V2::BatchRecognizeResponse BatchRecognizeResponse}
955
+ # message of the Operation when completed. This is only supported when
956
+ # calling {::Google::Cloud::Speech::V2::Speech::Client#batch_recognize BatchRecognize}
957
+ # with just one audio file.
958
+ class RecognitionOutputConfig
959
+ include ::Google::Protobuf::MessageExts
960
+ extend ::Google::Protobuf::MessageExts::ClassMethods
961
+ end
962
+
886
963
  # Response message for
887
964
  # {::Google::Cloud::Speech::V2::Speech::Client#batch_recognize BatchRecognize} that is
888
965
  # packaged into a longrunning {::Google::Longrunning::Operation Operation}.
889
966
  # @!attribute [rw] results
890
967
  # @return [::Google::Protobuf::Map{::String => ::Google::Cloud::Speech::V2::BatchRecognizeFileResult}]
891
968
  # Map from filename to the final result for that file.
969
+ # @!attribute [rw] total_billed_duration
970
+ # @return [::Google::Protobuf::Duration]
971
+ # When available, billed audio seconds for the corresponding request.
892
972
  class BatchRecognizeResponse
893
973
  include ::Google::Protobuf::MessageExts
894
974
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -903,13 +983,36 @@ module Google
903
983
  end
904
984
  end
905
985
 
986
+ # Output type for Cloud Storage of BatchRecognize transcripts. Though this
987
+ # proto isn't returned in this API anywhere, the Cloud Storage transcripts will
988
+ # be this proto serialized and should be parsed as such.
989
+ # @!attribute [rw] results
990
+ # @return [::Array<::Google::Cloud::Speech::V2::SpeechRecognitionResult>]
991
+ # Sequential list of transcription results corresponding to sequential
992
+ # portions of audio.
993
+ # @!attribute [rw] metadata
994
+ # @return [::Google::Cloud::Speech::V2::RecognitionResponseMetadata]
995
+ # Metadata about the recognition.
996
+ class BatchRecognizeResults
997
+ include ::Google::Protobuf::MessageExts
998
+ extend ::Google::Protobuf::MessageExts::ClassMethods
999
+ end
1000
+
906
1001
  # Final results for a single file.
907
1002
  # @!attribute [rw] uri
908
1003
  # @return [::String]
909
- # The GCS URI to which recognition results were written.
1004
+ # The Cloud Storage URI to which recognition results were written.
910
1005
  # @!attribute [rw] error
911
1006
  # @return [::Google::Rpc::Status]
912
1007
  # Error if one was encountered.
1008
+ # @!attribute [rw] metadata
1009
+ # @return [::Google::Cloud::Speech::V2::RecognitionResponseMetadata]
1010
+ # @!attribute [rw] transcript
1011
+ # @return [::Google::Cloud::Speech::V2::BatchRecognizeResults]
1012
+ # The transcript for the audio file. This is populated only when
1013
+ # {::Google::Cloud::Speech::V2::InlineOutputConfig InlineOutputConfig} is set in
1014
+ # the
1015
+ # [RecognitionOutputConfig][[google.cloud.speech.v2.RecognitionOutputConfig].
913
1016
  class BatchRecognizeFileResult
914
1017
  include ::Google::Protobuf::MessageExts
915
1018
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -925,7 +1028,7 @@ module Google
925
1028
  # Error if one was encountered.
926
1029
  # @!attribute [rw] uri
927
1030
  # @return [::String]
928
- # The GCS URI to which recognition results will be written.
1031
+ # The Cloud Storage URI to which recognition results will be written.
929
1032
  class BatchRecognizeTranscriptionMetadata
930
1033
  include ::Google::Protobuf::MessageExts
931
1034
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -1290,7 +1393,8 @@ module Google
1290
1393
  # phrase will be recognized over other similar sounding phrases. The higher
1291
1394
  # the boost, the higher the chance of false positive recognition as well.
1292
1395
  # Valid `boost` values are between 0 (exclusive) and 20. We recommend using a
1293
- # binary search approach to finding the optimal value for your use case.
1396
+ # binary search approach to finding the optimal value for your use case as
1397
+ # well as adding phrases both with and without boost to your requests.
1294
1398
  # @!attribute [rw] display_name
1295
1399
  # @return [::String]
1296
1400
  # User-settable, human-readable name for the PhraseSet. Must be 63
@@ -1358,11 +1462,11 @@ module Google
1358
1462
  # be recognized over other similar sounding phrases. The higher the boost,
1359
1463
  # the higher the chance of false positive recognition as well. Negative
1360
1464
  # boost values would correspond to anti-biasing. Anti-biasing is not
1361
- # enabled, so negative boost will simply be ignored. Though `boost` can
1362
- # accept a wide range of positive values, most use cases are best served
1363
- # with values between 0 and 20. We recommend using a binary search approach
1364
- # to finding the optimal value for your use case. Speech recognition
1365
- # will skip PhraseSets with a boost value of 0.
1465
+ # enabled, so negative boost values will return an error. Boost values must
1466
+ # be between 0 and 20. Any values outside that range will return an error.
1467
+ # We recommend using a binary search approach to finding the optimal value
1468
+ # for your use case as well as adding phrases both with and without boost
1469
+ # to your requests.
1366
1470
  class Phrase
1367
1471
  include ::Google::Protobuf::MessageExts
1368
1472
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -28,12 +28,14 @@ module Google
28
28
  # [API Design Guide](https://cloud.google.com/apis/design/errors).
29
29
  # @!attribute [rw] code
30
30
  # @return [::Integer]
31
- # The status code, which should be an enum value of [google.rpc.Code][google.rpc.Code].
31
+ # The status code, which should be an enum value of
32
+ # [google.rpc.Code][google.rpc.Code].
32
33
  # @!attribute [rw] message
33
34
  # @return [::String]
34
35
  # A developer-facing error message, which should be in English. Any
35
36
  # user-facing error message should be localized and sent in the
36
- # {::Google::Rpc::Status#details google.rpc.Status.details} field, or localized by the client.
37
+ # {::Google::Rpc::Status#details google.rpc.Status.details} field, or localized
38
+ # by the client.
37
39
  # @!attribute [rw] details
38
40
  # @return [::Array<::Google::Protobuf::Any>]
39
41
  # A list of messages that carry the error details. There is a common set of
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech-v2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Google LLC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-03 00:00:00.000000000 Z
11
+ date: 2023-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gapic-common
@@ -16,7 +16,7 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0.12'
19
+ version: 0.18.0
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
22
  version: 2.a
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- version: '0.12'
29
+ version: 0.18.0
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: 2.a
@@ -44,20 +44,40 @@ dependencies:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
46
  version: '1.0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: google-cloud-location
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0.4'
54
+ - - "<"
55
+ - !ruby/object:Gem::Version
56
+ version: 2.a
57
+ type: :runtime
58
+ prerelease: false
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0.4'
64
+ - - "<"
65
+ - !ruby/object:Gem::Version
66
+ version: 2.a
47
67
  - !ruby/object:Gem::Dependency
48
68
  name: google-style
49
69
  requirement: !ruby/object:Gem::Requirement
50
70
  requirements:
51
71
  - - "~>"
52
72
  - !ruby/object:Gem::Version
53
- version: 1.26.1
73
+ version: 1.26.3
54
74
  type: :development
55
75
  prerelease: false
56
76
  version_requirements: !ruby/object:Gem::Requirement
57
77
  requirements:
58
78
  - - "~>"
59
79
  - !ruby/object:Gem::Version
60
- version: 1.26.1
80
+ version: 1.26.3
61
81
  - !ruby/object:Gem::Dependency
62
82
  name: minitest
63
83
  requirement: !ruby/object:Gem::Requirement
@@ -175,16 +195,24 @@ files:
175
195
  - README.md
176
196
  - lib/google-cloud-speech-v2.rb
177
197
  - lib/google/cloud/speech/v2.rb
198
+ - lib/google/cloud/speech/v2/bindings_override.rb
178
199
  - lib/google/cloud/speech/v2/cloud_speech_pb.rb
179
200
  - lib/google/cloud/speech/v2/cloud_speech_services_pb.rb
201
+ - lib/google/cloud/speech/v2/rest.rb
180
202
  - lib/google/cloud/speech/v2/speech.rb
181
203
  - lib/google/cloud/speech/v2/speech/client.rb
182
204
  - lib/google/cloud/speech/v2/speech/credentials.rb
183
205
  - lib/google/cloud/speech/v2/speech/operations.rb
184
206
  - lib/google/cloud/speech/v2/speech/paths.rb
207
+ - lib/google/cloud/speech/v2/speech/rest.rb
208
+ - lib/google/cloud/speech/v2/speech/rest/client.rb
209
+ - lib/google/cloud/speech/v2/speech/rest/operations.rb
210
+ - lib/google/cloud/speech/v2/speech/rest/service_stub.rb
185
211
  - lib/google/cloud/speech/v2/version.rb
186
212
  - proto_docs/README.md
213
+ - proto_docs/google/api/client.rb
187
214
  - proto_docs/google/api/field_behavior.rb
215
+ - proto_docs/google/api/launch_stage.rb
188
216
  - proto_docs/google/api/resource.rb
189
217
  - proto_docs/google/cloud/speech/v2/cloud_speech.rb
190
218
  - proto_docs/google/longrunning/operations.rb
@@ -213,8 +241,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
213
241
  - !ruby/object:Gem::Version
214
242
  version: '0'
215
243
  requirements: []
216
- rubygems_version: 3.3.14
244
+ rubygems_version: 3.4.2
217
245
  signing_key:
218
246
  specification_version: 4
219
- summary: API Client library for the Cloud Speech-to-Text V2 API
247
+ summary: Converts audio to text by applying powerful neural network models.
220
248
  test_files: []