google-cloud-speech-v1p1beta1 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5f8a422d44657d200973215f7be62dc602e85670f696b63a5f746098bf95363
4
- data.tar.gz: 66ffa2a37d29b3a6397e7eacbd45313c540b9c897bd1f28e1416bb2db44e8a9a
3
+ metadata.gz: 3fb51328890358e72c887d4c047bbe5380b02f67a57e19a315a824f35fedf151
4
+ data.tar.gz: 76ad18cf2a0031d1d491e791fd84f45cc67f84abb59361a3366a116dc640b4c3
5
5
  SHA512:
6
- metadata.gz: 8c653961c30391c40cb6cc1e3c84a503d9de996a3b78a8cb5fe09e4736493d26cf8c6fef6bb9c31f32ce84fd20954bf31fddd295923350f7f553c7df0542b7af
7
- data.tar.gz: f0272fb389329bcfcd9b83c2db707057dd7d5adcfd018e9889b3e391e7297c5f5beb51030c1170eec261f3746e64d7f18b5c27b673e762e2c6e7bde606f68754
6
+ metadata.gz: b59f26dd15aa099a0e4b048a90e44d2a7bd4d745a10c047939508923c0ef515172378efbc0b75fff35c9f74603ed8ee585a6b7e659e5a2c898260a622ee32385
7
+ data.tar.gz: 0420ff684d70303bf3f1fdfe38bb31ae944807c0441215d507b6e6a4fa027032a2d5b2c5a582cc51db7f659e9d87167ad53668b012d7ec55b3720a20767a8402
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ruby Client for the Cloud Speech-to-Text V1p1beta1 API
2
2
 
3
- API Client library for the Cloud Speech-to-Text V1p1beta1 API
3
+ Converts audio to text by applying powerful neural network models.
4
4
 
5
5
  Google Speech-to-Text enables developers to convert audio to text by applying powerful neural network models in an easy-to-use API. The API recognizes more than 120 languages and variants to support your global user base. You can enable voice command-and-control, transcribe audio from call centers, and more. It can process real-time streaming or prerecorded audio, using Google's machine learning technology.
6
6
 
@@ -39,6 +39,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
39
39
  optional :config, :message, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig"
40
40
  optional :single_utterance, :bool, 2
41
41
  optional :interim_results, :bool, 3
42
+ optional :enable_voice_activity_events, :bool, 5
43
+ optional :voice_activity_timeout, :message, 6, "google.cloud.speech.v1p1beta1.StreamingRecognitionConfig.VoiceActivityTimeout"
44
+ end
45
+ add_message "google.cloud.speech.v1p1beta1.StreamingRecognitionConfig.VoiceActivityTimeout" do
46
+ optional :speech_start_timeout, :message, 1, "google.protobuf.Duration"
47
+ optional :speech_end_timeout, :message, 2, "google.protobuf.Duration"
42
48
  end
43
49
  add_message "google.cloud.speech.v1p1beta1.RecognitionConfig" do
44
50
  optional :encoding, :enum, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding"
@@ -159,6 +165,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
159
165
  optional :error, :message, 1, "google.rpc.Status"
160
166
  repeated :results, :message, 2, "google.cloud.speech.v1p1beta1.StreamingRecognitionResult"
161
167
  optional :speech_event_type, :enum, 4, "google.cloud.speech.v1p1beta1.StreamingRecognizeResponse.SpeechEventType"
168
+ optional :speech_event_time, :message, 8, "google.protobuf.Duration"
162
169
  optional :total_billed_time, :message, 5, "google.protobuf.Duration"
163
170
  optional :speech_adaptation_info, :message, 9, "google.cloud.speech.v1p1beta1.SpeechAdaptationInfo"
164
171
  optional :request_id, :int64, 10
@@ -166,6 +173,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
166
173
  add_enum "google.cloud.speech.v1p1beta1.StreamingRecognizeResponse.SpeechEventType" do
167
174
  value :SPEECH_EVENT_UNSPECIFIED, 0
168
175
  value :END_OF_SINGLE_UTTERANCE, 1
176
+ value :SPEECH_ACTIVITY_BEGIN, 2
177
+ value :SPEECH_ACTIVITY_END, 3
178
+ value :SPEECH_ACTIVITY_TIMEOUT, 4
169
179
  end
170
180
  add_message "google.cloud.speech.v1p1beta1.StreamingRecognitionResult" do
171
181
  repeated :alternatives, :message, 1, "google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative"
@@ -209,6 +219,7 @@ module Google
209
219
  TranscriptOutputConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.TranscriptOutputConfig").msgclass
210
220
  StreamingRecognizeRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognizeRequest").msgclass
211
221
  StreamingRecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognitionConfig").msgclass
222
+ StreamingRecognitionConfig::VoiceActivityTimeout = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognitionConfig.VoiceActivityTimeout").msgclass
212
223
  RecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionConfig").msgclass
213
224
  RecognitionConfig::AudioEncoding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding").enummodule
214
225
  SpeakerDiarizationConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.SpeakerDiarizationConfig").msgclass
@@ -21,7 +21,7 @@ module Google
21
21
  module Cloud
22
22
  module Speech
23
23
  module V1p1beta1
24
- VERSION = "0.16.0"
24
+ VERSION = "0.17.0"
25
25
  end
26
26
  end
27
27
  end
@@ -122,9 +122,30 @@ module Google
122
122
  # returned as they become available (these interim results are indicated with
123
123
  # the `is_final=false` flag).
124
124
  # If `false` or omitted, only `is_final=true` result(s) are returned.
125
+ # @!attribute [rw] enable_voice_activity_events
126
+ # @return [::Boolean]
127
+ # If `true`, responses with voice activity speech events will be returned as
128
+ # they are detected.
129
+ # @!attribute [rw] voice_activity_timeout
130
+ # @return [::Google::Cloud::Speech::V1p1beta1::StreamingRecognitionConfig::VoiceActivityTimeout]
131
+ # If set, the server will automatically close the stream after the specified
132
+ # duration has elapsed after the last VOICE_ACTIVITY speech event has been
133
+ # sent. The field `voice_activity_events` must also be set to true.
125
134
  class StreamingRecognitionConfig
126
135
  include ::Google::Protobuf::MessageExts
127
136
  extend ::Google::Protobuf::MessageExts::ClassMethods
137
+
138
+ # Events that a timeout can be set on for voice activity.
139
+ # @!attribute [rw] speech_start_timeout
140
+ # @return [::Google::Protobuf::Duration]
141
+ # Duration to timeout the stream if no speech begins.
142
+ # @!attribute [rw] speech_end_timeout
143
+ # @return [::Google::Protobuf::Duration]
144
+ # Duration to timeout the stream after speech ends.
145
+ class VoiceActivityTimeout
146
+ include ::Google::Protobuf::MessageExts
147
+ extend ::Google::Protobuf::MessageExts::ClassMethods
148
+ end
128
149
  end
129
150
 
130
151
  # Provides information to the recognizer that specifies how to process the
@@ -133,7 +154,8 @@ module Google
133
154
  # @return [::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding]
134
155
  # Encoding of audio data sent in all `RecognitionAudio` messages.
135
156
  # This field is optional for `FLAC` and `WAV` audio files and required
136
- # for all other audio formats. For details, see {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
157
+ # for all other audio formats. For details, see
158
+ # {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
137
159
  # @!attribute [rw] sample_rate_hertz
138
160
  # @return [::Integer]
139
161
  # Sample rate in Hertz of the audio data sent in all
@@ -142,7 +164,8 @@ module Google
142
164
  # source to 16000 Hz. If that's not possible, use the native sample rate of
143
165
  # the audio source (instead of re-sampling).
144
166
  # This field is optional for FLAC and WAV audio files, but is
145
- # required for all other audio formats. For details, see {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
167
+ # required for all other audio formats. For details, see
168
+ # {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
146
169
  # @!attribute [rw] audio_channel_count
147
170
  # @return [::Integer]
148
171
  # The number of channels in the input audio data.
@@ -363,7 +386,8 @@ module Google
363
386
  # an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
364
387
  # encoding configuration must match the encoding described in the audio
365
388
  # header; otherwise the request returns an
366
- # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
389
+ # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
390
+ # code.
367
391
  module AudioEncoding
368
392
  # Not specified.
369
393
  ENCODING_UNSPECIFIED = 0
@@ -612,8 +636,8 @@ module Google
612
636
 
613
637
  # Contains audio data in the encoding specified in the `RecognitionConfig`.
614
638
  # Either `content` or `uri` must be supplied. Supplying both or neither
615
- # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
616
- # [content limits](https://cloud.google.com/speech-to-text/quotas#content).
639
+ # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
640
+ # See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
617
641
  # @!attribute [rw] content
618
642
  # @return [::String]
619
643
  # The audio data bytes encoded as specified in
@@ -626,8 +650,9 @@ module Google
626
650
  # Currently, only Google Cloud Storage URIs are
627
651
  # supported, which must be specified in the following format:
628
652
  # `gs://bucket_name/object_name` (other URI formats return
629
- # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
630
- # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
653
+ # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
654
+ # For more information, see [Request
655
+ # URIs](https://cloud.google.com/storage/docs/reference-uris).
631
656
  class RecognitionAudio
632
657
  include ::Google::Protobuf::MessageExts
633
658
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -700,11 +725,12 @@ module Google
700
725
  # Time of the most recent processing update.
701
726
  # @!attribute [r] uri
702
727
  # @return [::String]
703
- # Output only. The URI of the audio file being transcribed. Empty if the audio was sent
704
- # as byte content.
728
+ # Output only. The URI of the audio file being transcribed. Empty if the
729
+ # audio was sent as byte content.
705
730
  # @!attribute [r] output_config
706
731
  # @return [::Google::Cloud::Speech::V1p1beta1::TranscriptOutputConfig]
707
- # Output only. A copy of the TranscriptOutputConfig if it was set in the request.
732
+ # Output only. A copy of the TranscriptOutputConfig if it was set in the
733
+ # request.
708
734
  class LongRunningRecognizeMetadata
709
735
  include ::Google::Protobuf::MessageExts
710
736
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -772,6 +798,9 @@ module Google
772
798
  # @!attribute [rw] speech_event_type
773
799
  # @return [::Google::Cloud::Speech::V1p1beta1::StreamingRecognizeResponse::SpeechEventType]
774
800
  # Indicates the type of speech event.
801
+ # @!attribute [rw] speech_event_time
802
+ # @return [::Google::Protobuf::Duration]
803
+ # Time offset between the beginning of the audio and event emission.
775
804
  # @!attribute [rw] total_billed_time
776
805
  # @return [::Google::Protobuf::Duration]
777
806
  # When available, billed audio seconds for the stream.
@@ -800,6 +829,23 @@ module Google
800
829
  # until the server closes the gRPC connection. This event is only sent if
801
830
  # `single_utterance` was set to `true`, and is not used otherwise.
802
831
  END_OF_SINGLE_UTTERANCE = 1
832
+
833
+ # This event indicates that the server has detected the beginning of human
834
+ # voice activity in the stream. This event can be returned multiple times
835
+ # if speech starts and stops repeatedly throughout the stream. This event
836
+ # is only sent if `voice_activity_events` is set to true.
837
+ SPEECH_ACTIVITY_BEGIN = 2
838
+
839
+ # This event indicates that the server has detected the end of human voice
840
+ # activity in the stream. This event can be returned multiple times if
841
+ # speech starts and stops repeatedly throughout the stream. This event is
842
+ # only sent if `voice_activity_events` is set to true.
843
+ SPEECH_ACTIVITY_END = 3
844
+
845
+ # This event indicates that the user-set timeout for speech activity begin
846
+ # or end has exceeded. Upon receiving this event, the client is expected to
847
+ # send a half close. Further audio will not be processed.
848
+ SPEECH_ACTIVITY_TIMEOUT = 4
803
849
  end
804
850
  end
805
851
 
@@ -836,9 +882,9 @@ module Google
836
882
  # For audio_channel_count = N, its output values can range from '1' to 'N'.
837
883
  # @!attribute [r] language_code
838
884
  # @return [::String]
839
- # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
840
- # of the language in this result. This language code was detected to have
841
- # the most likelihood of being spoken in the audio.
885
+ # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
886
+ # language tag of the language in this result. This language code was
887
+ # detected to have the most likelihood of being spoken in the audio.
842
888
  class StreamingRecognitionResult
843
889
  include ::Google::Protobuf::MessageExts
844
890
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -862,9 +908,9 @@ module Google
862
908
  # beginning of the audio.
863
909
  # @!attribute [r] language_code
864
910
  # @return [::String]
865
- # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
866
- # of the language in this result. This language code was detected to have
867
- # the most likelihood of being spoken in the audio.
911
+ # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
912
+ # language tag of the language in this result. This language code was
913
+ # detected to have the most likelihood of being spoken in the audio.
868
914
  class SpeechRecognitionResult
869
915
  include ::Google::Protobuf::MessageExts
870
916
  extend ::Google::Protobuf::MessageExts::ClassMethods
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech-v1p1beta1
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Google LLC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-23 00:00:00.000000000 Z
11
+ date: 2023-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gapic-common
@@ -16,7 +16,7 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.17.1
19
+ version: 0.18.0
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
22
  version: 2.a
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- version: 0.17.1
29
+ version: 0.18.0
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: 2.a
@@ -236,5 +236,5 @@ requirements: []
236
236
  rubygems_version: 3.4.2
237
237
  signing_key:
238
238
  specification_version: 4
239
- summary: API Client library for the Cloud Speech-to-Text V1p1beta1 API
239
+ summary: Converts audio to text by applying powerful neural network models.
240
240
  test_files: []