google-cloud-speech-v1 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c3b3e6ee54f4e2bba948193c467882bbf4e47dd69ceac3bbe160cf204db2a9a
|
4
|
+
data.tar.gz: 55978252b10819ed6cfc5508227fecfa57ba4f761af83c4f4ec54cf12832451c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b64bf3e6385c2f25add2efbfd16be36e7eaaa8f4c4d2078ad3022f9982d6574ac7e6387272c234033980524e8ff6b103ea8db9cf5280b6188105b9f69e5140c3
|
7
|
+
data.tar.gz: 7d0c1743806c0aa83f873773dae32393d63a921b7a90a0beeeab241bb3ec60a7b5350d154b3cebe8e67af125b219b4519bd6fdefacd4b7fa4aba368d90dbd102
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Ruby Client for the Cloud Speech-to-Text V1 API
|
2
2
|
|
3
|
-
|
3
|
+
Converts audio to text by applying powerful neural network models.
|
4
4
|
|
5
5
|
Google Speech-to-Text enables developers to convert audio to text by applying powerful neural network models in an easy-to-use API. The API recognizes more than 120 languages and variants to support your global user base. You can enable voice command-and-control, transcribe audio from call centers, and more. It can process real-time streaming or prerecorded audio, using Google's machine learning technology.
|
6
6
|
|
@@ -39,6 +39,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
39
39
|
optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
|
40
40
|
optional :single_utterance, :bool, 2
|
41
41
|
optional :interim_results, :bool, 3
|
42
|
+
optional :enable_voice_activity_events, :bool, 5
|
43
|
+
optional :voice_activity_timeout, :message, 6, "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout"
|
44
|
+
end
|
45
|
+
add_message "google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout" do
|
46
|
+
optional :speech_start_timeout, :message, 1, "google.protobuf.Duration"
|
47
|
+
optional :speech_end_timeout, :message, 2, "google.protobuf.Duration"
|
42
48
|
end
|
43
49
|
add_message "google.cloud.speech.v1.RecognitionConfig" do
|
44
50
|
optional :encoding, :enum, 1, "google.cloud.speech.v1.RecognitionConfig.AudioEncoding"
|
@@ -153,6 +159,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
153
159
|
optional :error, :message, 1, "google.rpc.Status"
|
154
160
|
repeated :results, :message, 2, "google.cloud.speech.v1.StreamingRecognitionResult"
|
155
161
|
optional :speech_event_type, :enum, 4, "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType"
|
162
|
+
optional :speech_event_time, :message, 8, "google.protobuf.Duration"
|
156
163
|
optional :total_billed_time, :message, 5, "google.protobuf.Duration"
|
157
164
|
optional :speech_adaptation_info, :message, 9, "google.cloud.speech.v1.SpeechAdaptationInfo"
|
158
165
|
optional :request_id, :int64, 10
|
@@ -160,6 +167,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
160
167
|
add_enum "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType" do
|
161
168
|
value :SPEECH_EVENT_UNSPECIFIED, 0
|
162
169
|
value :END_OF_SINGLE_UTTERANCE, 1
|
170
|
+
value :SPEECH_ACTIVITY_BEGIN, 2
|
171
|
+
value :SPEECH_ACTIVITY_END, 3
|
172
|
+
value :SPEECH_ACTIVITY_TIMEOUT, 4
|
163
173
|
end
|
164
174
|
add_message "google.cloud.speech.v1.StreamingRecognitionResult" do
|
165
175
|
repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
|
@@ -203,6 +213,7 @@ module Google
|
|
203
213
|
TranscriptOutputConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.TranscriptOutputConfig").msgclass
|
204
214
|
StreamingRecognizeRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeRequest").msgclass
|
205
215
|
StreamingRecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig").msgclass
|
216
|
+
StreamingRecognitionConfig::VoiceActivityTimeout = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig.VoiceActivityTimeout").msgclass
|
206
217
|
RecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig").msgclass
|
207
218
|
RecognitionConfig::AudioEncoding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig.AudioEncoding").enummodule
|
208
219
|
SpeakerDiarizationConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeakerDiarizationConfig").msgclass
|
@@ -122,9 +122,30 @@ module Google
|
|
122
122
|
# returned as they become available (these interim results are indicated with
|
123
123
|
# the `is_final=false` flag).
|
124
124
|
# If `false` or omitted, only `is_final=true` result(s) are returned.
|
125
|
+
# @!attribute [rw] enable_voice_activity_events
|
126
|
+
# @return [::Boolean]
|
127
|
+
# If `true`, responses with voice activity speech events will be returned as
|
128
|
+
# they are detected.
|
129
|
+
# @!attribute [rw] voice_activity_timeout
|
130
|
+
# @return [::Google::Cloud::Speech::V1::StreamingRecognitionConfig::VoiceActivityTimeout]
|
131
|
+
# If set, the server will automatically close the stream after the specified
|
132
|
+
# duration has elapsed after the last VOICE_ACTIVITY speech event has been
|
133
|
+
# sent. The field `voice_activity_events` must also be set to true.
|
125
134
|
class StreamingRecognitionConfig
|
126
135
|
include ::Google::Protobuf::MessageExts
|
127
136
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
137
|
+
|
138
|
+
# Events that a timeout can be set on for voice activity.
|
139
|
+
# @!attribute [rw] speech_start_timeout
|
140
|
+
# @return [::Google::Protobuf::Duration]
|
141
|
+
# Duration to timeout the stream if no speech begins.
|
142
|
+
# @!attribute [rw] speech_end_timeout
|
143
|
+
# @return [::Google::Protobuf::Duration]
|
144
|
+
# Duration to timeout the stream after speech ends.
|
145
|
+
class VoiceActivityTimeout
|
146
|
+
include ::Google::Protobuf::MessageExts
|
147
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
148
|
+
end
|
128
149
|
end
|
129
150
|
|
130
151
|
# Provides information to the recognizer that specifies how to process the
|
@@ -133,7 +154,8 @@ module Google
|
|
133
154
|
# @return [::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding]
|
134
155
|
# Encoding of audio data sent in all `RecognitionAudio` messages.
|
135
156
|
# This field is optional for `FLAC` and `WAV` audio files and required
|
136
|
-
# for all other audio formats. For details, see
|
157
|
+
# for all other audio formats. For details, see
|
158
|
+
# {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
137
159
|
# @!attribute [rw] sample_rate_hertz
|
138
160
|
# @return [::Integer]
|
139
161
|
# Sample rate in Hertz of the audio data sent in all
|
@@ -142,7 +164,8 @@ module Google
|
|
142
164
|
# source to 16000 Hz. If that's not possible, use the native sample rate of
|
143
165
|
# the audio source (instead of re-sampling).
|
144
166
|
# This field is optional for FLAC and WAV audio files, but is
|
145
|
-
# required for all other audio formats. For details, see
|
167
|
+
# required for all other audio formats. For details, see
|
168
|
+
# {::Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
146
169
|
# @!attribute [rw] audio_channel_count
|
147
170
|
# @return [::Integer]
|
148
171
|
# The number of channels in the input audio data.
|
@@ -346,7 +369,8 @@ module Google
|
|
346
369
|
# an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
|
347
370
|
# encoding configuration must match the encoding described in the audio
|
348
371
|
# header; otherwise the request returns an
|
349
|
-
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
|
372
|
+
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
|
373
|
+
# code.
|
350
374
|
module AudioEncoding
|
351
375
|
# Not specified.
|
352
376
|
ENCODING_UNSPECIFIED = 0
|
@@ -585,8 +609,8 @@ module Google
|
|
585
609
|
|
586
610
|
# Contains audio data in the encoding specified in the `RecognitionConfig`.
|
587
611
|
# Either `content` or `uri` must be supplied. Supplying both or neither
|
588
|
-
# returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
589
|
-
# [content limits](https://cloud.google.com/speech-to-text/quotas#content).
|
612
|
+
# returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
613
|
+
# See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
|
590
614
|
# @!attribute [rw] content
|
591
615
|
# @return [::String]
|
592
616
|
# The audio data bytes encoded as specified in
|
@@ -599,8 +623,9 @@ module Google
|
|
599
623
|
# Currently, only Google Cloud Storage URIs are
|
600
624
|
# supported, which must be specified in the following format:
|
601
625
|
# `gs://bucket_name/object_name` (other URI formats return
|
602
|
-
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
|
603
|
-
# [Request
|
626
|
+
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
|
627
|
+
# For more information, see [Request
|
628
|
+
# URIs](https://cloud.google.com/storage/docs/reference-uris).
|
604
629
|
class RecognitionAudio
|
605
630
|
include ::Google::Protobuf::MessageExts
|
606
631
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -673,8 +698,8 @@ module Google
|
|
673
698
|
# Time of the most recent processing update.
|
674
699
|
# @!attribute [r] uri
|
675
700
|
# @return [::String]
|
676
|
-
# Output only. The URI of the audio file being transcribed. Empty if the
|
677
|
-
# as byte content.
|
701
|
+
# Output only. The URI of the audio file being transcribed. Empty if the
|
702
|
+
# audio was sent as byte content.
|
678
703
|
class LongRunningRecognizeMetadata
|
679
704
|
include ::Google::Protobuf::MessageExts
|
680
705
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -742,6 +767,9 @@ module Google
|
|
742
767
|
# @!attribute [rw] speech_event_type
|
743
768
|
# @return [::Google::Cloud::Speech::V1::StreamingRecognizeResponse::SpeechEventType]
|
744
769
|
# Indicates the type of speech event.
|
770
|
+
# @!attribute [rw] speech_event_time
|
771
|
+
# @return [::Google::Protobuf::Duration]
|
772
|
+
# Time offset between the beginning of the audio and event emission.
|
745
773
|
# @!attribute [rw] total_billed_time
|
746
774
|
# @return [::Google::Protobuf::Duration]
|
747
775
|
# When available, billed audio seconds for the stream.
|
@@ -770,6 +798,23 @@ module Google
|
|
770
798
|
# until the server closes the gRPC connection. This event is only sent if
|
771
799
|
# `single_utterance` was set to `true`, and is not used otherwise.
|
772
800
|
END_OF_SINGLE_UTTERANCE = 1
|
801
|
+
|
802
|
+
# This event indicates that the server has detected the beginning of human
|
803
|
+
# voice activity in the stream. This event can be returned multiple times
|
804
|
+
# if speech starts and stops repeatedly throughout the stream. This event
|
805
|
+
# is only sent if `voice_activity_events` is set to true.
|
806
|
+
SPEECH_ACTIVITY_BEGIN = 2
|
807
|
+
|
808
|
+
# This event indicates that the server has detected the end of human voice
|
809
|
+
# activity in the stream. This event can be returned multiple times if
|
810
|
+
# speech starts and stops repeatedly throughout the stream. This event is
|
811
|
+
# only sent if `voice_activity_events` is set to true.
|
812
|
+
SPEECH_ACTIVITY_END = 3
|
813
|
+
|
814
|
+
# This event indicates that the user-set timeout for speech activity begin
|
815
|
+
# or end has exceeded. Upon receiving this event, the client is expected to
|
816
|
+
# send a half close. Further audio will not be processed.
|
817
|
+
SPEECH_ACTIVITY_TIMEOUT = 4
|
773
818
|
end
|
774
819
|
end
|
775
820
|
|
@@ -806,9 +851,9 @@ module Google
|
|
806
851
|
# For audio_channel_count = N, its output values can range from '1' to 'N'.
|
807
852
|
# @!attribute [r] language_code
|
808
853
|
# @return [::String]
|
809
|
-
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
810
|
-
# of the language in this result. This language code was
|
811
|
-
# the most likelihood of being spoken in the audio.
|
854
|
+
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
855
|
+
# language tag of the language in this result. This language code was
|
856
|
+
# detected to have the most likelihood of being spoken in the audio.
|
812
857
|
class StreamingRecognitionResult
|
813
858
|
include ::Google::Protobuf::MessageExts
|
814
859
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -832,9 +877,9 @@ module Google
|
|
832
877
|
# beginning of the audio.
|
833
878
|
# @!attribute [r] language_code
|
834
879
|
# @return [::String]
|
835
|
-
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
836
|
-
# of the language in this result. This language code was
|
837
|
-
# the most likelihood of being spoken in the audio.
|
880
|
+
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
881
|
+
# language tag of the language in this result. This language code was
|
882
|
+
# detected to have the most likelihood of being spoken in the audio.
|
838
883
|
class SpeechRecognitionResult
|
839
884
|
include ::Google::Protobuf::MessageExts
|
840
885
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-speech-v1
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Google LLC
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
11
|
+
date: 2023-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: gapic-common
|
@@ -16,7 +16,7 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.18.0
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 2.a
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.
|
29
|
+
version: 0.18.0
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 2.a
|
@@ -236,5 +236,5 @@ requirements: []
|
|
236
236
|
rubygems_version: 3.4.2
|
237
237
|
signing_key:
|
238
238
|
specification_version: 4
|
239
|
-
summary:
|
239
|
+
summary: Converts audio to text by applying powerful neural network models.
|
240
240
|
test_files: []
|