google-cloud-speech-v1 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3b8d30c86908298f03ad30a9bac371535967be78d2d8876192b4e95a24c60b3a
4
- data.tar.gz: 390bb4dedb2679807486df2974f614e66e8ffb1b82d11e7151cd78fd814533a7
3
+ metadata.gz: 5c3b3e6ee54f4e2bba948193c467882bbf4e47dd69ceac3bbe160cf204db2a9a
4
+ data.tar.gz: 55978252b10819ed6cfc5508227fecfa57ba4f761af83c4f4ec54cf12832451c
5
5
  SHA512:
6
- metadata.gz: bd36ba2e48c2ad07491faa766d9dc4ffc71db8f15f903044feb13bcb137906d7322ab0c10da91c041c0c73d54508a2f80b4942c4df06d8f93889733106560bcd
7
- data.tar.gz: 34cf9fa2aa0549dccc6337151299fb93bdff1ea6069972af69abca72ac2d1002209a967405fddb85ea30ed8dcf852baefed22e83ecfc698c0882e3ec1e5b935b
6
+ metadata.gz: b64bf3e6385c2f25add2efbfd16be36e7eaaa8f4c4d2078ad3022f9982d6574ac7e6387272c234033980524e8ff6b103ea8db9cf5280b6188105b9f69e5140c3
7
+ data.tar.gz: 7d0c1743806c0aa83f873773dae32393d63a921b7a90a0beeeab241bb3ec60a7b5350d154b3cebe8e67af125b219b4519bd6fdefacd4b7fa4aba368d90dbd102
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ruby Client for the Cloud Speech-to-Text V1 API
2
2
 
3
- API Client library for the Cloud Speech-to-Text V1 API
3
+ Converts audio to text by applying powerful neural network models.
4
4
 
5
5
  Google Speech-to-Text enables developers to convert audio to text by applying powerful neural network models in an easy-to-use API. The API recognizes more than 120 languages and variants to support your global user base. You can enable voice command-and-control, transcribe audio from call centers, and more. It can process real-time streaming or prerecorded audio, using Google's machine learning technology.
6
6
 
@@ -39,6 +39,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
39
39
  optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
40
40
  optional :single_utterance, :bool, 2
41
41
  optional :interim_results, :bool, 3
42
+ optional :enable_voice_activity_events, :bool, 5
43
+ optional :voice_activity_timeout, :message, 6, "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout"
44
+ end
45
+ add_message "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout" do
46
+ optional :speech_start_timeout, :message, 1, "google.protobuf.Duration"
47
+ optional :speech_end_timeout, :message, 2, "google.protobuf.Duration"
42
48
  end
43
49
  add_message "google.cloud.speech.v1.RecognitionConfig" do
44
50
  optional :encoding, :enum, 1, "google.cloud.speech.v1.RecognitionConfig.AudioEncoding"
@@ -153,6 +159,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
153
159
  optional :error, :message, 1, "google.rpc.Status"
154
160
  repeated :results, :message, 2, "google.cloud.speech.v1.StreamingRecognitionResult"
155
161
  optional :speech_event_type, :enum, 4, "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType"
162
+ optional :speech_event_time, :message, 8, "google.protobuf.Duration"
156
163
  optional :total_billed_time, :message, 5, "google.protobuf.Duration"
157
164
  optional :speech_adaptation_info, :message, 9, "google.cloud.speech.v1.SpeechAdaptationInfo"
158
165
  optional :request_id, :int64, 10
@@ -160,6 +167,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
160
167
  add_enum "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType" do
161
168
  value :SPEECH_EVENT_UNSPECIFIED, 0
162
169
  value :END_OF_SINGLE_UTTERANCE, 1
170
+ value :SPEECH_ACTIVITY_BEGIN, 2
171
+ value :SPEECH_ACTIVITY_END, 3
172
+ value :SPEECH_ACTIVITY_TIMEOUT, 4
163
173
  end
164
174
  add_message "google.cloud.speech.v1.StreamingRecognitionResult" do
165
175
  repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
@@ -203,6 +213,7 @@ module Google
203
213
  TranscriptOutputConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.TranscriptOutputConfig").msgclass
204
214
  StreamingRecognizeRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeRequest").msgclass
205
215
  StreamingRecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig").msgclass
216
+ StreamingRecognitionConfig::VoiceActivityTimeout = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout").msgclass
206
217
  RecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig").msgclass
207
218
  RecognitionConfig::AudioEncoding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig.AudioEncoding").enummodule
208
219
  SpeakerDiarizationConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeakerDiarizationConfig").msgclass
@@ -21,7 +21,7 @@ module Google
21
21
  module Cloud
22
22
  module Speech
23
23
  module V1
24
- VERSION = "0.11.0"
24
+ VERSION = "0.12.0"
25
25
  end
26
26
  end
27
27
  end
@@ -122,9 +122,30 @@ module Google
122
122
  # returned as they become available (these interim results are indicated with
123
123
  # the `is_final=false` flag).
124
124
  # If `false` or omitted, only `is_final=true` result(s) are returned.
125
+ # @!attribute [rw] enable_voice_activity_events
126
+ # @return [::Boolean]
127
+ # If `true`, responses with voice activity speech events will be returned as
128
+ # they are detected.
129
+ # @!attribute [rw] voice_activity_timeout
130
+ # @return [::Google::Cloud::Speech::V1::StreamingRecognitionConfig::VoiceActivityTimeout]
131
+ # If set, the server will automatically close the stream after the specified
132
+ # duration has elapsed after the last VOICE_ACTIVITY speech event has been
133
+ # sent. The field `voice_activity_events` must also be set to true.
125
134
  class StreamingRecognitionConfig
126
135
  include ::Google::Protobuf::MessageExts
127
136
  extend ::Google::Protobuf::MessageExts::ClassMethods
137
+
138
+ # Events that a timeout can be set on for voice activity.
139
+ # @!attribute [rw] speech_start_timeout
140
+ # @return [::Google::Protobuf::Duration]
141
+ # Duration to timeout the stream if no speech begins.
142
+ # @!attribute [rw] speech_end_timeout
143
+ # @return [::Google::Protobuf::Duration]
144
+ # Duration to timeout the stream after speech ends.
145
+ class VoiceActivityTimeout
146
+ include ::Google::Protobuf::MessageExts
147
+ extend ::Google::Protobuf::MessageExts::ClassMethods
148
+ end
128
149
  end
129
150
 
130
151
  # Provides information to the recognizer that specifies how to process the
@@ -133,7 +154,8 @@ module Google
133
154
  # @return [::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding]
134
155
  # Encoding of audio data sent in all `RecognitionAudio` messages.
135
156
  # This field is optional for `FLAC` and `WAV` audio files and required
136
- # for all other audio formats. For details, see {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
157
+ # for all other audio formats. For details, see
158
+ # {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
137
159
  # @!attribute [rw] sample_rate_hertz
138
160
  # @return [::Integer]
139
161
  # Sample rate in Hertz of the audio data sent in all
@@ -142,7 +164,8 @@ module Google
142
164
  # source to 16000 Hz. If that's not possible, use the native sample rate of
143
165
  # the audio source (instead of re-sampling).
144
166
  # This field is optional for FLAC and WAV audio files, but is
145
- # required for all other audio formats. For details, see {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
167
+ # required for all other audio formats. For details, see
168
+ # {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
146
169
  # @!attribute [rw] audio_channel_count
147
170
  # @return [::Integer]
148
171
  # The number of channels in the input audio data.
@@ -346,7 +369,8 @@ module Google
346
369
  # an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
347
370
  # encoding configuration must match the encoding described in the audio
348
371
  # header; otherwise the request returns an
349
- # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
372
+ # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
373
+ # code.
350
374
  module AudioEncoding
351
375
  # Not specified.
352
376
  ENCODING_UNSPECIFIED = 0
@@ -585,8 +609,8 @@ module Google
585
609
 
586
610
  # Contains audio data in the encoding specified in the `RecognitionConfig`.
587
611
  # Either `content` or `uri` must be supplied. Supplying both or neither
588
- # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
589
- # [content limits](https://cloud.google.com/speech-to-text/quotas#content).
612
+ # returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
613
+ # See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
590
614
  # @!attribute [rw] content
591
615
  # @return [::String]
592
616
  # The audio data bytes encoded as specified in
@@ -599,8 +623,9 @@ module Google
599
623
  # Currently, only Google Cloud Storage URIs are
600
624
  # supported, which must be specified in the following format:
601
625
  # `gs://bucket_name/object_name` (other URI formats return
602
- # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
603
- # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
626
+ # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
627
+ # For more information, see [Request
628
+ # URIs](https://cloud.google.com/storage/docs/reference-uris).
604
629
  class RecognitionAudio
605
630
  include ::Google::Protobuf::MessageExts
606
631
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -673,8 +698,8 @@ module Google
673
698
  # Time of the most recent processing update.
674
699
  # @!attribute [r] uri
675
700
  # @return [::String]
676
- # Output only. The URI of the audio file being transcribed. Empty if the audio was sent
677
- # as byte content.
701
+ # Output only. The URI of the audio file being transcribed. Empty if the
702
+ # audio was sent as byte content.
678
703
  class LongRunningRecognizeMetadata
679
704
  include ::Google::Protobuf::MessageExts
680
705
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -742,6 +767,9 @@ module Google
742
767
  # @!attribute [rw] speech_event_type
743
768
  # @return [::Google::Cloud::Speech::V1::StreamingRecognizeResponse::SpeechEventType]
744
769
  # Indicates the type of speech event.
770
+ # @!attribute [rw] speech_event_time
771
+ # @return [::Google::Protobuf::Duration]
772
+ # Time offset between the beginning of the audio and event emission.
745
773
  # @!attribute [rw] total_billed_time
746
774
  # @return [::Google::Protobuf::Duration]
747
775
  # When available, billed audio seconds for the stream.
@@ -770,6 +798,23 @@ module Google
770
798
  # until the server closes the gRPC connection. This event is only sent if
771
799
  # `single_utterance` was set to `true`, and is not used otherwise.
772
800
  END_OF_SINGLE_UTTERANCE = 1
801
+
802
+ # This event indicates that the server has detected the beginning of human
803
+ # voice activity in the stream. This event can be returned multiple times
804
+ # if speech starts and stops repeatedly throughout the stream. This event
805
+ # is only sent if `voice_activity_events` is set to true.
806
+ SPEECH_ACTIVITY_BEGIN = 2
807
+
808
+ # This event indicates that the server has detected the end of human voice
809
+ # activity in the stream. This event can be returned multiple times if
810
+ # speech starts and stops repeatedly throughout the stream. This event is
811
+ # only sent if `voice_activity_events` is set to true.
812
+ SPEECH_ACTIVITY_END = 3
813
+
814
+ # This event indicates that the user-set timeout for speech activity begin
815
+ # or end has exceeded. Upon receiving this event, the client is expected to
816
+ # send a half close. Further audio will not be processed.
817
+ SPEECH_ACTIVITY_TIMEOUT = 4
773
818
  end
774
819
  end
775
820
 
@@ -806,9 +851,9 @@ module Google
806
851
  # For audio_channel_count = N, its output values can range from '1' to 'N'.
807
852
  # @!attribute [r] language_code
808
853
  # @return [::String]
809
- # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
810
- # of the language in this result. This language code was detected to have
811
- # the most likelihood of being spoken in the audio.
854
+ # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
855
+ # language tag of the language in this result. This language code was
856
+ # detected to have the most likelihood of being spoken in the audio.
812
857
  class StreamingRecognitionResult
813
858
  include ::Google::Protobuf::MessageExts
814
859
  extend ::Google::Protobuf::MessageExts::ClassMethods
@@ -832,9 +877,9 @@ module Google
832
877
  # beginning of the audio.
833
878
  # @!attribute [r] language_code
834
879
  # @return [::String]
835
- # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
836
- # of the language in this result. This language code was detected to have
837
- # the most likelihood of being spoken in the audio.
880
+ # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
881
+ # language tag of the language in this result. This language code was
882
+ # detected to have the most likelihood of being spoken in the audio.
838
883
  class SpeechRecognitionResult
839
884
  include ::Google::Protobuf::MessageExts
840
885
  extend ::Google::Protobuf::MessageExts::ClassMethods
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech-v1
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Google LLC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-23 00:00:00.000000000 Z
11
+ date: 2023-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gapic-common
@@ -16,7 +16,7 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.17.1
19
+ version: 0.18.0
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
22
  version: 2.a
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- version: 0.17.1
29
+ version: 0.18.0
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: 2.a
@@ -236,5 +236,5 @@ requirements: []
236
236
  rubygems_version: 3.4.2
237
237
  signing_key:
238
238
  specification_version: 4
239
- summary: API Client library for the Cloud Speech-to-Text V1 API
239
+ summary: Converts audio to text by applying powerful neural network models.
240
240
  test_files: []