google-cloud-speech-v1 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3b8d30c86908298f03ad30a9bac371535967be78d2d8876192b4e95a24c60b3a
4
- data.tar.gz: 390bb4dedb2679807486df2974f614e66e8ffb1b82d11e7151cd78fd814533a7
3
+ metadata.gz: 5c3b3e6ee54f4e2bba948193c467882bbf4e47dd69ceac3bbe160cf204db2a9a
4
+ data.tar.gz: 55978252b10819ed6cfc5508227fecfa57ba4f761af83c4f4ec54cf12832451c
5
5
  SHA512:
6
- metadata.gz: bd36ba2e48c2ad07491faa766d9dc4ffc71db8f15f903044feb13bcb137906d7322ab0c10da91c041c0c73d54508a2f80b4942c4df06d8f93889733106560bcd
7
- data.tar.gz: 34cf9fa2aa0549dccc6337151299fb93bdff1ea6069972af69abca72ac2d1002209a967405fddb85ea30ed8dcf852baefed22e83ecfc698c0882e3ec1e5b935b
6
+ metadata.gz: b64bf3e6385c2f25add2efbfd16be36e7eaaa8f4c4d2078ad3022f9982d6574ac7e6387272c234033980524e8ff6b103ea8db9cf5280b6188105b9f69e5140c3
7
+ data.tar.gz: 7d0c1743806c0aa83f873773dae32393d63a921b7a90a0beeeab241bb3ec60a7b5350d154b3cebe8e67af125b219b4519bd6fdefacd4b7fa4aba368d90dbd102
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ruby Client for the Cloud Speech-to-Text V1 API
2
2
 
3
- API Client library for the Cloud Speech-to-Text V1 API
3
+ Converts audio to text by applying powerful neural network models.
4
4
 
5
5
  Google Speech-to-Text enables developers to convert audio to text by applying powerful neural network models in an easy-to-use API. The API recognizes more than 120 languages and variants to support your global user base. You can enable voice command-and-control, transcribe audio from call centers, and more. It can process real-time streaming or prerecorded audio, using Google's machine learning technology.
6
6
 
@@ -39,6 +39,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
39
39
  optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
40
40
  optional :single_utterance, :bool, 2
41
41
  optional :interim_results, :bool, 3
42
+ optional :enable_voice_activity_events, :bool, 5
43
+ optional :voice_activity_timeout, :message, 6, "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout"
44
+ end
45
+ add_message "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout" do
46
+ optional :speech_start_timeout, :message, 1, "google.protobuf.Duration"
47
+ optional :speech_end_timeout, :message, 2, "google.protobuf.Duration"
42
48
  end
43
49
  add_message "google.cloud.speech.v1.RecognitionConfig" do
44
50
  optional :encoding, :enum, 1, "google.cloud.speech.v1.RecognitionConfig.AudioEncoding"
@@ -153,6 +159,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
153
159
  optional :error, :message, 1, "google.rpc.Status"
154
160
  repeated :results, :message, 2, "google.cloud.speech.v1.StreamingRecognitionResult"
155
161
  optional :speech_event_type, :enum, 4, "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType"
162
+ optional :speech_event_time, :message, 8, "google.protobuf.Duration"
156
163
  optional :total_billed_time, :message, 5, "google.protobuf.Duration"
157
164
  optional :speech_adaptation_info, :message, 9, "google.cloud.speech.v1.SpeechAdaptationInfo"
158
165
  optional :request_id, :int64, 10
@@ -160,6 +167,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
160
167
  add_enum "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType" do
161
168
  value :SPEECH_EVENT_UNSPECIFIED, 0
162
169
  value :END_OF_SINGLE_UTTERANCE, 1
170
+ value :SPEECH_ACTIVITY_BEGIN, 2
171
+ value :SPEECH_ACTIVITY_END, 3
172
+ value :SPEECH_ACTIVITY_TIMEOUT, 4
163
173
  end
164
174
  add_message "google.cloud.speech.v1.StreamingRecognitionResult" do
165
175
  repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
@@ -203,6 +213,7 @@ module Google
203
213
  TranscriptOutputConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.TranscriptOutputConfig").msgclass
204
214
  StreamingRecognizeRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeRequest").msgclass
205
215
  StreamingRecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig").msgclass
216
+ StreamingRecognitionConfig::VoiceActivityTimeout = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout").msgclass
206
217
  RecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig").msgclass
207
218
  RecognitionConfig::AudioEncoding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig.AudioEncoding").enummodule
208
219
  SpeakerDiarizationConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeakerDiarizationConfig").msgclass
@@ -21,7 +21,7 @@ module Google
21
21
  module Cloud
22
22
  module Speech
23
23
  module V1
24
- VERSION = "0.11.0"
24
+ VERSION = "0.12.0"
25
25
  end
26
26
  end
27
27
  end
@@ -122,9 +122,30 @@ module Google
122
122
  # returned as they become available (these interim results are indicated with
123
123
  # the `is_final=false` flag).
124
124
  # If `false` or omitted, only `is_final=true` result(s) are returned.
125
+ # @!attribute [rw] enable_voice_activity_events
126
+ # @return [::Boolean]
127
+ # If `true`, responses with voice activity speech events will be returned as
128
+ # they are detected.
129
+ # @!attribute [rw] voice_activity_timeout
130
+ # @return [::Google::Cloud::Speech::V1::StreamingRecognitionConfig::VoiceActivityTimeout]
131
+ # If set, the server will automatically close the stream after the specified
132
+ # duration has elapsed after the last VOICE_ACTIVITY speech event has been
133
+ # sent. The field `voice_activity_events` must also be set to true.
125
134
  class StreamingRecognitionConfig
126
135
  include ::Google::Protobuf::MessageExts
127
136
  extend ::Google::Protobuf::MessageExts::ClassMethods
137
+
138
+ # Events that a timeout can be set on for voice activity.
139
+ # @!attribute [rw] speech_start_timeout
140
+ # @return [::Google::Protobuf::Duration]
141
+ # Duration to timeout the stream if no speech begins.
142
+ # @!attribute [rw] speech_end_timeout
143
+ # @return [::Google::Protobuf::Duration]
144
+ # Duration to timeout the stream after speech ends.
145
+ class VoiceActivityTimeout
146
+ include ::Google::Protobuf::MessageExts
147
+ extend ::Google::Protobuf::MessageExts::ClassMethods
148
+ end
128
149
  end
129
150
 
130
151
  # Provides information to the recognizer that specifies how to process the
@@ -133,7 +154,8 @@ module Google
133
154
  # @return [::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding]
134
155
  # Encoding of audio data sent in all `RecognitionAudio` messages.
135
156
  # This field is optional for `FLAC` and `WAV` audio files and required
136
- # for all other audio formats. For details, see {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
157
+ # for all other audio formats. For details, see
158
+ # {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
137
159
  # @!attribute [rw] sample_rate_hertz
138
160
  # @return [::Integer]
139
161
  # Sample rate in Hertz of the audio data sent in all
@@ -142,7 +164,8 @@ module Google
142
164
  # source to 16000 Hz. If that's not possible, use the native sample rate of
143
165
  # the audio source (instead of re-sampling).
144
166
  # This field is optional for FLAC and WAV audio files, but is
145
- # required for all other audio formats. For details, see {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
167
+ # required for all other audio formats. For details, see
168
+ # {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
146
169
  # @!attribute [rw] audio_channel_count
147
170
  # @return [::Integer]
148
171
  # The number of channels in the input audio data.
@@ -346,7 +369,8 @@ module Google
346
369
  # an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
347
370
  # encoding configuration must match the encoding described in the audio
348
371
  # header; otherwise the request returns an
349
- # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
372
+ # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
373
+ # code.
350
374
  module AudioEncoding
351
375
  # Not specified.
352
376
  ENCODING_UNSPECIFIED = 0
@@ -585,8 +609,8 @@ module Google
585
609
 
586
610
  # Contains audio data in the encoding specified in the `RecognitionConfig`.
587
611
  # Either `content` or `uri` must be supplied. Supplying both or neither
588
- # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
589
- # [content limits](https://cloud.google.com/speech-to-text/quotas#content).
612
+ # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
613
+ # See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
590
614
  # @!attribute [rw] content
591
615
  # @return [::String]
592
616
  # The audio data bytes encoded as specified in
@@ -599,8 +623,9 @@ module Google
599
623
  # Currently, only Google Cloud Storage URIs are
600
624
  # supported, which must be specified in the following format:
601
625
  # `gs://bucket_name/object_name` (other URI formats return
602
- # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
603
- # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
626
+ # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
627
+ # For more information, see [Request
628
+ # URIs](https://cloud.google.com/storage/docs/reference-uris).
604
629
  class RecognitionAudio
605
630
  include ::Google::Protobuf::MessageExts
606
631
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -673,8 +698,8 @@ module Google
673
698
  # Time of the most recent processing update.
674
699
  # @!attribute [r] uri
675
700
  # @return [::String]
676
- # Output only. The URI of the audio file being transcribed. Empty if the audio was sent
677
- # as byte content.
701
+ # Output only. The URI of the audio file being transcribed. Empty if the
702
+ # audio was sent as byte content.
678
703
  class LongRunningRecognizeMetadata
679
704
  include ::Google::Protobuf::MessageExts
680
705
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -742,6 +767,9 @@ module Google
742
767
  # @!attribute [rw] speech_event_type
743
768
  # @return [::Google::Cloud::Speech::V1::StreamingRecognizeResponse::SpeechEventType]
744
769
  # Indicates the type of speech event.
770
+ # @!attribute [rw] speech_event_time
771
+ # @return [::Google::Protobuf::Duration]
772
+ # Time offset between the beginning of the audio and event emission.
745
773
  # @!attribute [rw] total_billed_time
746
774
  # @return [::Google::Protobuf::Duration]
747
775
  # When available, billed audio seconds for the stream.
@@ -770,6 +798,23 @@ module Google
770
798
  # until the server closes the gRPC connection. This event is only sent if
771
799
  # `single_utterance` was set to `true`, and is not used otherwise.
772
800
  END_OF_SINGLE_UTTERANCE = 1
801
+
802
+ # This event indicates that the server has detected the beginning of human
803
+ # voice activity in the stream. This event can be returned multiple times
804
+ # if speech starts and stops repeatedly throughout the stream. This event
805
+ # is only sent if `voice_activity_events` is set to true.
806
+ SPEECH_ACTIVITY_BEGIN = 2
807
+
808
+ # This event indicates that the server has detected the end of human voice
809
+ # activity in the stream. This event can be returned multiple times if
810
+ # speech starts and stops repeatedly throughout the stream. This event is
811
+ # only sent if `voice_activity_events` is set to true.
812
+ SPEECH_ACTIVITY_END = 3
813
+
814
+ # This event indicates that the user-set timeout for speech activity begin
815
+ # or end has exceeded. Upon receiving this event, the client is expected to
816
+ # send a half close. Further audio will not be processed.
817
+ SPEECH_ACTIVITY_TIMEOUT = 4
773
818
  end
774
819
  end
775
820
 
@@ -806,9 +851,9 @@ module Google
806
851
  # For audio_channel_count = N, its output values can range from '1' to 'N'.
807
852
  # @!attribute [r] language_code
808
853
  # @return [::String]
809
- # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
810
- # of the language in this result. This language code was detected to have
811
- # the most likelihood of being spoken in the audio.
854
+ # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
855
+ # language tag of the language in this result. This language code was
856
+ # detected to have the most likelihood of being spoken in the audio.
812
857
  class StreamingRecognitionResult
813
858
  include ::Google::Protobuf::MessageExts
814
859
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -832,9 +877,9 @@ module Google
832
877
  # beginning of the audio.
833
878
  # @!attribute [r] language_code
834
879
  # @return [::String]
835
- # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
836
- # of the language in this result. This language code was detected to have
837
- # the most likelihood of being spoken in the audio.
880
+ # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
881
+ # language tag of the language in this result. This language code was
882
+ # detected to have the most likelihood of being spoken in the audio.
838
883
  class SpeechRecognitionResult
839
884
  include ::Google::Protobuf::MessageExts
840
885
  extend ::Google::Protobuf::MessageExts::ClassMethods
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech-v1
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Google LLC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-23 00:00:00.000000000 Z
11
+ date: 2023-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gapic-common
@@ -16,7 +16,7 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.17.1
19
+ version: 0.18.0
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
22
  version: 2.a
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- version: 0.17.1
29
+ version: 0.18.0
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: 2.a
@@ -236,5 +236,5 @@ requirements: []
236
236
  rubygems_version: 3.4.2
237
237
  signing_key:
238
238
  specification_version: 4
239
- summary: API Client library for the Cloud Speech-to-Text V1 API
239
+ summary: Converts audio to text by applying powerful neural network models.
240
240
  test_files: []