google-cloud-speech-v1 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c3b3e6ee54f4e2bba948193c467882bbf4e47dd69ceac3bbe160cf204db2a9a
|
4
|
+
data.tar.gz: 55978252b10819ed6cfc5508227fecfa57ba4f761af83c4f4ec54cf12832451c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b64bf3e6385c2f25add2efbfd16be36e7eaaa8f4c4d2078ad3022f9982d6574ac7e6387272c234033980524e8ff6b103ea8db9cf5280b6188105b9f69e5140c3
|
7
|
+
data.tar.gz: 7d0c1743806c0aa83f873773dae32393d63a921b7a90a0beeeab241bb3ec60a7b5350d154b3cebe8e67af125b219b4519bd6fdefacd4b7fa4aba368d90dbd102
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Ruby Client for the Cloud Speech-to-Text V1 API
|
2
2
|
|
3
|
-
|
3
|
+
Converts audio to text by applying powerful neural network models.
|
4
4
|
|
5
5
|
Google Speech-to-Text enables developers to convert audio to text by applying powerful neural network models in an easy-to-use API. The API recognizes more than 120 languages and variants to support your global user base. You can enable voice command-and-control, transcribe audio from call centers, and more. It can process real-time streaming or prerecorded audio, using Google's machine learning technology.
|
6
6
|
|
@@ -39,6 +39,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
39
39
|
optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
|
40
40
|
optional :single_utterance, :bool, 2
|
41
41
|
optional :interim_results, :bool, 3
|
42
|
+
optional :enable_voice_activity_events, :bool, 5
|
43
|
+
optional :voice_activity_timeout, :message, 6, "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout"
|
44
|
+
end
|
45
|
+
add_message "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout" do
|
46
|
+
optional :speech_start_timeout, :message, 1, "google.protobuf.Duration"
|
47
|
+
optional :speech_end_timeout, :message, 2, "google.protobuf.Duration"
|
42
48
|
end
|
43
49
|
add_message "google.cloud.speech.v1.RecognitionConfig" do
|
44
50
|
optional :encoding, :enum, 1, "google.cloud.speech.v1.RecognitionConfig.AudioEncoding"
|
@@ -153,6 +159,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
153
159
|
optional :error, :message, 1, "google.rpc.Status"
|
154
160
|
repeated :results, :message, 2, "google.cloud.speech.v1.StreamingRecognitionResult"
|
155
161
|
optional :speech_event_type, :enum, 4, "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType"
|
162
|
+
optional :speech_event_time, :message, 8, "google.protobuf.Duration"
|
156
163
|
optional :total_billed_time, :message, 5, "google.protobuf.Duration"
|
157
164
|
optional :speech_adaptation_info, :message, 9, "google.cloud.speech.v1.SpeechAdaptationInfo"
|
158
165
|
optional :request_id, :int64, 10
|
@@ -160,6 +167,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
160
167
|
add_enum "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType" do
|
161
168
|
value :SPEECH_EVENT_UNSPECIFIED, 0
|
162
169
|
value :END_OF_SINGLE_UTTERANCE, 1
|
170
|
+
value :SPEECH_ACTIVITY_BEGIN, 2
|
171
|
+
value :SPEECH_ACTIVITY_END, 3
|
172
|
+
value :SPEECH_ACTIVITY_TIMEOUT, 4
|
163
173
|
end
|
164
174
|
add_message "google.cloud.speech.v1.StreamingRecognitionResult" do
|
165
175
|
repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
|
@@ -203,6 +213,7 @@ module Google
|
|
203
213
|
TranscriptOutputConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.TranscriptOutputConfig").msgclass
|
204
214
|
StreamingRecognizeRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeRequest").msgclass
|
205
215
|
StreamingRecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig").msgclass
|
216
|
+
StreamingRecognitionConfig::VoiceActivityTimeout = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout").msgclass
|
206
217
|
RecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig").msgclass
|
207
218
|
RecognitionConfig::AudioEncoding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig.AudioEncoding").enummodule
|
208
219
|
SpeakerDiarizationConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeakerDiarizationConfig").msgclass
|
@@ -122,9 +122,30 @@ module Google
|
|
122
122
|
# returned as they become available (these interim results are indicated with
|
123
123
|
# the `is_final=false` flag).
|
124
124
|
# If `false` or omitted, only `is_final=true` result(s) are returned.
|
125
|
+
# @!attribute [rw] enable_voice_activity_events
|
126
|
+
# @return [::Boolean]
|
127
|
+
# If `true`, responses with voice activity speech events will be returned as
|
128
|
+
# they are detected.
|
129
|
+
# @!attribute [rw] voice_activity_timeout
|
130
|
+
# @return [::Google::Cloud::Speech::V1::StreamingRecognitionConfig::VoiceActivityTimeout]
|
131
|
+
# If set, the server will automatically close the stream after the specified
|
132
|
+
# duration has elapsed after the last VOICE_ACTIVITY speech event has been
|
133
|
+
# sent. The field `voice_activity_events` must also be set to true.
|
125
134
|
class StreamingRecognitionConfig
|
126
135
|
include ::Google::Protobuf::MessageExts
|
127
136
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
137
|
+
|
138
|
+
# Events that a timeout can be set on for voice activity.
|
139
|
+
# @!attribute [rw] speech_start_timeout
|
140
|
+
# @return [::Google::Protobuf::Duration]
|
141
|
+
# Duration to timeout the stream if no speech begins.
|
142
|
+
# @!attribute [rw] speech_end_timeout
|
143
|
+
# @return [::Google::Protobuf::Duration]
|
144
|
+
# Duration to timeout the stream after speech ends.
|
145
|
+
class VoiceActivityTimeout
|
146
|
+
include ::Google::Protobuf::MessageExts
|
147
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
148
|
+
end
|
128
149
|
end
|
129
150
|
|
130
151
|
# Provides information to the recognizer that specifies how to process the
|
@@ -133,7 +154,8 @@ module Google
|
|
133
154
|
# @return [::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding]
|
134
155
|
# Encoding of audio data sent in all `RecognitionAudio` messages.
|
135
156
|
# This field is optional for `FLAC` and `WAV` audio files and required
|
136
|
-
# for all other audio formats. For details, see
|
157
|
+
# for all other audio formats. For details, see
|
158
|
+
# {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
137
159
|
# @!attribute [rw] sample_rate_hertz
|
138
160
|
# @return [::Integer]
|
139
161
|
# Sample rate in Hertz of the audio data sent in all
|
@@ -142,7 +164,8 @@ module Google
|
|
142
164
|
# source to 16000 Hz. If that's not possible, use the native sample rate of
|
143
165
|
# the audio source (instead of re-sampling).
|
144
166
|
# This field is optional for FLAC and WAV audio files, but is
|
145
|
-
# required for all other audio formats. For details, see
|
167
|
+
# required for all other audio formats. For details, see
|
168
|
+
# {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
146
169
|
# @!attribute [rw] audio_channel_count
|
147
170
|
# @return [::Integer]
|
148
171
|
# The number of channels in the input audio data.
|
@@ -346,7 +369,8 @@ module Google
|
|
346
369
|
# an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
|
347
370
|
# encoding configuration must match the encoding described in the audio
|
348
371
|
# header; otherwise the request returns an
|
349
|
-
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
|
372
|
+
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
|
373
|
+
# code.
|
350
374
|
module AudioEncoding
|
351
375
|
# Not specified.
|
352
376
|
ENCODING_UNSPECIFIED = 0
|
@@ -585,8 +609,8 @@ module Google
|
|
585
609
|
|
586
610
|
# Contains audio data in the encoding specified in the `RecognitionConfig`.
|
587
611
|
# Either `content` or `uri` must be supplied. Supplying both or neither
|
588
|
-
# returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
589
|
-
# [content limits](https://cloud.google.com/speech-to-text/quotas#content).
|
612
|
+
# returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
613
|
+
# See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
|
590
614
|
# @!attribute [rw] content
|
591
615
|
# @return [::String]
|
592
616
|
# The audio data bytes encoded as specified in
|
@@ -599,8 +623,9 @@ module Google
|
|
599
623
|
# Currently, only Google Cloud Storage URIs are
|
600
624
|
# supported, which must be specified in the following format:
|
601
625
|
# `gs://bucket_name/object_name` (other URI formats return
|
602
|
-
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
|
603
|
-
# [Request
|
626
|
+
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
|
627
|
+
# For more information, see [Request
|
628
|
+
# URIs](https://cloud.google.com/storage/docs/reference-uris).
|
604
629
|
class RecognitionAudio
|
605
630
|
include ::Google::Protobuf::MessageExts
|
606
631
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -673,8 +698,8 @@ module Google
|
|
673
698
|
# Time of the most recent processing update.
|
674
699
|
# @!attribute [r] uri
|
675
700
|
# @return [::String]
|
676
|
-
# Output only. The URI of the audio file being transcribed. Empty if the
|
677
|
-
# as byte content.
|
701
|
+
# Output only. The URI of the audio file being transcribed. Empty if the
|
702
|
+
# audio was sent as byte content.
|
678
703
|
class LongRunningRecognizeMetadata
|
679
704
|
include ::Google::Protobuf::MessageExts
|
680
705
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -742,6 +767,9 @@ module Google
|
|
742
767
|
# @!attribute [rw] speech_event_type
|
743
768
|
# @return [::Google::Cloud::Speech::V1::StreamingRecognizeResponse::SpeechEventType]
|
744
769
|
# Indicates the type of speech event.
|
770
|
+
# @!attribute [rw] speech_event_time
|
771
|
+
# @return [::Google::Protobuf::Duration]
|
772
|
+
# Time offset between the beginning of the audio and event emission.
|
745
773
|
# @!attribute [rw] total_billed_time
|
746
774
|
# @return [::Google::Protobuf::Duration]
|
747
775
|
# When available, billed audio seconds for the stream.
|
@@ -770,6 +798,23 @@ module Google
|
|
770
798
|
# until the server closes the gRPC connection. This event is only sent if
|
771
799
|
# `single_utterance` was set to `true`, and is not used otherwise.
|
772
800
|
END_OF_SINGLE_UTTERANCE = 1
|
801
|
+
|
802
|
+
# This event indicates that the server has detected the beginning of human
|
803
|
+
# voice activity in the stream. This event can be returned multiple times
|
804
|
+
# if speech starts and stops repeatedly throughout the stream. This event
|
805
|
+
# is only sent if `voice_activity_events` is set to true.
|
806
|
+
SPEECH_ACTIVITY_BEGIN = 2
|
807
|
+
|
808
|
+
# This event indicates that the server has detected the end of human voice
|
809
|
+
# activity in the stream. This event can be returned multiple times if
|
810
|
+
# speech starts and stops repeatedly throughout the stream. This event is
|
811
|
+
# only sent if `voice_activity_events` is set to true.
|
812
|
+
SPEECH_ACTIVITY_END = 3
|
813
|
+
|
814
|
+
# This event indicates that the user-set timeout for speech activity begin
|
815
|
+
# or end has exceeded. Upon receiving this event, the client is expected to
|
816
|
+
# send a half close. Further audio will not be processed.
|
817
|
+
SPEECH_ACTIVITY_TIMEOUT = 4
|
773
818
|
end
|
774
819
|
end
|
775
820
|
|
@@ -806,9 +851,9 @@ module Google
|
|
806
851
|
# For audio_channel_count = N, its output values can range from '1' to 'N'.
|
807
852
|
# @!attribute [r] language_code
|
808
853
|
# @return [::String]
|
809
|
-
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
810
|
-
# of the language in this result. This language code was
|
811
|
-
# the most likelihood of being spoken in the audio.
|
854
|
+
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
855
|
+
# language tag of the language in this result. This language code was
|
856
|
+
# detected to have the most likelihood of being spoken in the audio.
|
812
857
|
class StreamingRecognitionResult
|
813
858
|
include ::Google::Protobuf::MessageExts
|
814
859
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -832,9 +877,9 @@ module Google
|
|
832
877
|
# beginning of the audio.
|
833
878
|
# @!attribute [r] language_code
|
834
879
|
# @return [::String]
|
835
|
-
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
836
|
-
# of the language in this result. This language code was
|
837
|
-
# the most likelihood of being spoken in the audio.
|
880
|
+
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
881
|
+
# language tag of the language in this result. This language code was
|
882
|
+
# detected to have the most likelihood of being spoken in the audio.
|
838
883
|
class SpeechRecognitionResult
|
839
884
|
include ::Google::Protobuf::MessageExts
|
840
885
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-speech-v1
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Google LLC
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
11
|
+
date: 2023-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: gapic-common
|
@@ -16,7 +16,7 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.18.0
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 2.a
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.
|
29
|
+
version: 0.18.0
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 2.a
|
@@ -236,5 +236,5 @@ requirements: []
|
|
236
236
|
rubygems_version: 3.4.2
|
237
237
|
signing_key:
|
238
238
|
specification_version: 4
|
239
|
-
summary:
|
239
|
+
summary: Converts audio to text by applying powerful neural network models.
|
240
240
|
test_files: []
|