google-cloud-speech-v1p1beta1 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5f8a422d44657d200973215f7be62dc602e85670f696b63a5f746098bf95363
4
- data.tar.gz: 66ffa2a37d29b3a6397e7eacbd45313c540b9c897bd1f28e1416bb2db44e8a9a
3
+ metadata.gz: 3fb51328890358e72c887d4c047bbe5380b02f67a57e19a315a824f35fedf151
4
+ data.tar.gz: 76ad18cf2a0031d1d491e791fd84f45cc67f84abb59361a3366a116dc640b4c3
5
5
  SHA512:
6
- metadata.gz: 8c653961c30391c40cb6cc1e3c84a503d9de996a3b78a8cb5fe09e4736493d26cf8c6fef6bb9c31f32ce84fd20954bf31fddd295923350f7f553c7df0542b7af
7
- data.tar.gz: f0272fb389329bcfcd9b83c2db707057dd7d5adcfd018e9889b3e391e7297c5f5beb51030c1170eec261f3746e64d7f18b5c27b673e762e2c6e7bde606f68754
6
+ metadata.gz: b59f26dd15aa099a0e4b048a90e44d2a7bd4d745a10c047939508923c0ef515172378efbc0b75fff35c9f74603ed8ee585a6b7e659e5a2c898260a622ee32385
7
+ data.tar.gz: 0420ff684d70303bf3f1fdfe38bb31ae944807c0441215d507b6e6a4fa027032a2d5b2c5a582cc51db7f659e9d87167ad53668b012d7ec55b3720a20767a8402
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ruby Client for the Cloud Speech-to-Text V1p1beta1 API
2
2
 
3
- API Client library for the Cloud Speech-to-Text V1p1beta1 API
3
+ Converts audio to text by applying powerful neural network models.
4
4
 
5
5
  Google Speech-to-Text enables developers to convert audio to text by applying powerful neural network models in an easy-to-use API. The API recognizes more than 120 languages and variants to support your global user base. You can enable voice command-and-control, transcribe audio from call centers, and more. It can process real-time streaming or prerecorded audio, using Google's machine learning technology.
6
6
 
@@ -39,6 +39,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
39
39
  optional :config, :message, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig"
40
40
  optional :single_utterance, :bool, 2
41
41
  optional :interim_results, :bool, 3
42
+ optional :enable_voice_activity_events, :bool, 5
43
+ optional :voice_activity_timeout, :message, 6, "google.cloud.speech.v1p1beta1.StreamingRecognitionConfig.VoiceActivityTimeout"
44
+ end
45
+ add_message "google.cloud.speech.v1p1beta1.StreamingRecognitionConfig.VoiceActivityTimeout" do
46
+ optional :speech_start_timeout, :message, 1, "google.protobuf.Duration"
47
+ optional :speech_end_timeout, :message, 2, "google.protobuf.Duration"
42
48
  end
43
49
  add_message "google.cloud.speech.v1p1beta1.RecognitionConfig" do
44
50
  optional :encoding, :enum, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding"
@@ -159,6 +165,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
159
165
  optional :error, :message, 1, "google.rpc.Status"
160
166
  repeated :results, :message, 2, "google.cloud.speech.v1p1beta1.StreamingRecognitionResult"
161
167
  optional :speech_event_type, :enum, 4, "google.cloud.speech.v1p1beta1.StreamingRecognizeResponse.SpeechEventType"
168
+ optional :speech_event_time, :message, 8, "google.protobuf.Duration"
162
169
  optional :total_billed_time, :message, 5, "google.protobuf.Duration"
163
170
  optional :speech_adaptation_info, :message, 9, "google.cloud.speech.v1p1beta1.SpeechAdaptationInfo"
164
171
  optional :request_id, :int64, 10
@@ -166,6 +173,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
166
173
  add_enum "google.cloud.speech.v1p1beta1.StreamingRecognizeResponse.SpeechEventType" do
167
174
  value :SPEECH_EVENT_UNSPECIFIED, 0
168
175
  value :END_OF_SINGLE_UTTERANCE, 1
176
+ value :SPEECH_ACTIVITY_BEGIN, 2
177
+ value :SPEECH_ACTIVITY_END, 3
178
+ value :SPEECH_ACTIVITY_TIMEOUT, 4
169
179
  end
170
180
  add_message "google.cloud.speech.v1p1beta1.StreamingRecognitionResult" do
171
181
  repeated :alternatives, :message, 1, "google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative"
@@ -209,6 +219,7 @@ module Google
209
219
  TranscriptOutputConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.TranscriptOutputConfig").msgclass
210
220
  StreamingRecognizeRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognizeRequest").msgclass
211
221
  StreamingRecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognitionConfig").msgclass
222
+ StreamingRecognitionConfig::VoiceActivityTimeout = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognitionConfig.VoiceActivityTimeout").msgclass
212
223
  RecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionConfig").msgclass
213
224
  RecognitionConfig::AudioEncoding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding").enummodule
214
225
  SpeakerDiarizationConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.SpeakerDiarizationConfig").msgclass
@@ -21,7 +21,7 @@ module Google
21
21
  module Cloud
22
22
  module Speech
23
23
  module V1p1beta1
24
- VERSION = "0.16.0"
24
+ VERSION = "0.17.0"
25
25
  end
26
26
  end
27
27
  end
@@ -122,9 +122,30 @@ module Google
122
122
  # returned as they become available (these interim results are indicated with
123
123
  # the `is_final=false` flag).
124
124
  # If `false` or omitted, only `is_final=true` result(s) are returned.
125
+ # @!attribute [rw] enable_voice_activity_events
126
+ # @return [::Boolean]
127
+ # If `true`, responses with voice activity speech events will be returned as
128
+ # they are detected.
129
+ # @!attribute [rw] voice_activity_timeout
130
+ # @return [::Google::Cloud::Speech::V1p1beta1::StreamingRecognitionConfig::VoiceActivityTimeout]
131
+ # If set, the server will automatically close the stream after the specified
132
+ # duration has elapsed after the last VOICE_ACTIVITY speech event has been
133
+ # sent. The field `voice_activity_events` must also be set to true.
125
134
  class StreamingRecognitionConfig
126
135
  include ::Google::Protobuf::MessageExts
127
136
  extend ::Google::Protobuf::MessageExts::ClassMethods
137
+
138
+ # Events that a timeout can be set on for voice activity.
139
+ # @!attribute [rw] speech_start_timeout
140
+ # @return [::Google::Protobuf::Duration]
141
+ # Duration to timeout the stream if no speech begins.
142
+ # @!attribute [rw] speech_end_timeout
143
+ # @return [::Google::Protobuf::Duration]
144
+ # Duration to timeout the stream after speech ends.
145
+ class VoiceActivityTimeout
146
+ include ::Google::Protobuf::MessageExts
147
+ extend ::Google::Protobuf::MessageExts::ClassMethods
148
+ end
128
149
  end
129
150
 
130
151
  # Provides information to the recognizer that specifies how to process the
@@ -133,7 +154,8 @@ module Google
133
154
  # @return [::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding]
134
155
  # Encoding of audio data sent in all `RecognitionAudio` messages.
135
156
  # This field is optional for `FLAC` and `WAV` audio files and required
136
- # for all other audio formats. For details, see {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
157
+ # for all other audio formats. For details, see
158
+ # {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
137
159
  # @!attribute [rw] sample_rate_hertz
138
160
  # @return [::Integer]
139
161
  # Sample rate in Hertz of the audio data sent in all
@@ -142,7 +164,8 @@ module Google
142
164
  # source to 16000 Hz. If that's not possible, use the native sample rate of
143
165
  # the audio source (instead of re-sampling).
144
166
  # This field is optional for FLAC and WAV audio files, but is
145
- # required for all other audio formats. For details, see {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
167
+ # required for all other audio formats. For details, see
168
+ # {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
146
169
  # @!attribute [rw] audio_channel_count
147
170
  # @return [::Integer]
148
171
  # The number of channels in the input audio data.
@@ -363,7 +386,8 @@ module Google
363
386
  # an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
364
387
  # encoding configuration must match the encoding described in the audio
365
388
  # header; otherwise the request returns an
366
- # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
389
+ # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
390
+ # code.
367
391
  module AudioEncoding
368
392
  # Not specified.
369
393
  ENCODING_UNSPECIFIED = 0
@@ -612,8 +636,8 @@ module Google
612
636
 
613
637
  # Contains audio data in the encoding specified in the `RecognitionConfig`.
614
638
  # Either `content` or `uri` must be supplied. Supplying both or neither
615
- # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
616
- # [content limits](https://cloud.google.com/speech-to-text/quotas#content).
639
+ # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
640
+ # See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
617
641
  # @!attribute [rw] content
618
642
  # @return [::String]
619
643
  # The audio data bytes encoded as specified in
@@ -626,8 +650,9 @@ module Google
626
650
  # Currently, only Google Cloud Storage URIs are
627
651
  # supported, which must be specified in the following format:
628
652
  # `gs://bucket_name/object_name` (other URI formats return
629
- # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
630
- # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
653
+ # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
654
+ # For more information, see [Request
655
+ # URIs](https://cloud.google.com/storage/docs/reference-uris).
631
656
  class RecognitionAudio
632
657
  include ::Google::Protobuf::MessageExts
633
658
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -700,11 +725,12 @@ module Google
700
725
  # Time of the most recent processing update.
701
726
  # @!attribute [r] uri
702
727
  # @return [::String]
703
- # Output only. The URI of the audio file being transcribed. Empty if the audio was sent
704
- # as byte content.
728
+ # Output only. The URI of the audio file being transcribed. Empty if the
729
+ # audio was sent as byte content.
705
730
  # @!attribute [r] output_config
706
731
  # @return [::Google::Cloud::Speech::V1p1beta1::TranscriptOutputConfig]
707
- # Output only. A copy of the TranscriptOutputConfig if it was set in the request.
732
+ # Output only. A copy of the TranscriptOutputConfig if it was set in the
733
+ # request.
708
734
  class LongRunningRecognizeMetadata
709
735
  include ::Google::Protobuf::MessageExts
710
736
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -772,6 +798,9 @@ module Google
772
798
  # @!attribute [rw] speech_event_type
773
799
  # @return [::Google::Cloud::Speech::V1p1beta1::StreamingRecognizeResponse::SpeechEventType]
774
800
  # Indicates the type of speech event.
801
+ # @!attribute [rw] speech_event_time
802
+ # @return [::Google::Protobuf::Duration]
803
+ # Time offset between the beginning of the audio and event emission.
775
804
  # @!attribute [rw] total_billed_time
776
805
  # @return [::Google::Protobuf::Duration]
777
806
  # When available, billed audio seconds for the stream.
@@ -800,6 +829,23 @@ module Google
800
829
  # until the server closes the gRPC connection. This event is only sent if
801
830
  # `single_utterance` was set to `true`, and is not used otherwise.
802
831
  END_OF_SINGLE_UTTERANCE = 1
832
+
833
+ # This event indicates that the server has detected the beginning of human
834
+ # voice activity in the stream. This event can be returned multiple times
835
+ # if speech starts and stops repeatedly throughout the stream. This event
836
+ # is only sent if `voice_activity_events` is set to true.
837
+ SPEECH_ACTIVITY_BEGIN = 2
838
+
839
+ # This event indicates that the server has detected the end of human voice
840
+ # activity in the stream. This event can be returned multiple times if
841
+ # speech starts and stops repeatedly throughout the stream. This event is
842
+ # only sent if `voice_activity_events` is set to true.
843
+ SPEECH_ACTIVITY_END = 3
844
+
845
+ # This event indicates that the user-set timeout for speech activity begin
846
+ # or end has exceeded. Upon receiving this event, the client is expected to
847
+ # send a half close. Further audio will not be processed.
848
+ SPEECH_ACTIVITY_TIMEOUT = 4
803
849
  end
804
850
  end
805
851
 
@@ -836,9 +882,9 @@ module Google
836
882
  # For audio_channel_count = N, its output values can range from '1' to 'N'.
837
883
  # @!attribute [r] language_code
838
884
  # @return [::String]
839
- # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
840
- # of the language in this result. This language code was detected to have
841
- # the most likelihood of being spoken in the audio.
885
+ # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
886
+ # language tag of the language in this result. This language code was
887
+ # detected to have the most likelihood of being spoken in the audio.
842
888
  class StreamingRecognitionResult
843
889
  include ::Google::Protobuf::MessageExts
844
890
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -862,9 +908,9 @@ module Google
862
908
  # beginning of the audio.
863
909
  # @!attribute [r] language_code
864
910
  # @return [::String]
865
- # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
866
- # of the language in this result. This language code was detected to have
867
- # the most likelihood of being spoken in the audio.
911
+ # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
912
+ # language tag of the language in this result. This language code was
913
+ # detected to have the most likelihood of being spoken in the audio.
868
914
  class SpeechRecognitionResult
869
915
  include ::Google::Protobuf::MessageExts
870
916
  extend ::Google::Protobuf::MessageExts::ClassMethods
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech-v1p1beta1
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Google LLC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-23 00:00:00.000000000 Z
11
+ date: 2023-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gapic-common
@@ -16,7 +16,7 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.17.1
19
+ version: 0.18.0
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
22
  version: 2.a
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- version: 0.17.1
29
+ version: 0.18.0
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: 2.a
@@ -236,5 +236,5 @@ requirements: []
236
236
  rubygems_version: 3.4.2
237
237
  signing_key:
238
238
  specification_version: 4
239
- summary: API Client library for the Cloud Speech-to-Text V1p1beta1 API
239
+ summary: Converts audio to text by applying powerful neural network models.
240
240
  test_files: []