google-cloud-speech-v1p1beta1 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3fb51328890358e72c887d4c047bbe5380b02f67a57e19a315a824f35fedf151
|
|
4
|
+
data.tar.gz: 76ad18cf2a0031d1d491e791fd84f45cc67f84abb59361a3366a116dc640b4c3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b59f26dd15aa099a0e4b048a90e44d2a7bd4d745a10c047939508923c0ef515172378efbc0b75fff35c9f74603ed8ee585a6b7e659e5a2c898260a622ee32385
|
|
7
|
+
data.tar.gz: 0420ff684d70303bf3f1fdfe38bb31ae944807c0441215d507b6e6a4fa027032a2d5b2c5a582cc51db7f659e9d87167ad53668b012d7ec55b3720a20767a8402
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Ruby Client for the Cloud Speech-to-Text V1p1beta1 API
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Converts audio to text by applying powerful neural network models.
|
|
4
4
|
|
|
5
5
|
Google Speech-to-Text enables developers to convert audio to text by applying powerful neural network models in an easy-to-use API. The API recognizes more than 120 languages and variants to support your global user base. You can enable voice command-and-control, transcribe audio from call centers, and more. It can process real-time streaming or prerecorded audio, using Google's machine learning technology.
|
|
6
6
|
|
|
@@ -39,6 +39,12 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
|
39
39
|
optional :config, :message, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig"
|
|
40
40
|
optional :single_utterance, :bool, 2
|
|
41
41
|
optional :interim_results, :bool, 3
|
|
42
|
+
optional :enable_voice_activity_events, :bool, 5
|
|
43
|
+
optional :voice_activity_timeout, :message, 6, "google.cloud.speech.v1p1beta1.StreamingRecognitionConfig.VoiceActivityTimeout"
|
|
44
|
+
end
|
|
45
|
+
add_message "google.cloud.speech.v1p1beta1.StreamingRecognitionConfig.VoiceActivityTimeout" do
|
|
46
|
+
optional :speech_start_timeout, :message, 1, "google.protobuf.Duration"
|
|
47
|
+
optional :speech_end_timeout, :message, 2, "google.protobuf.Duration"
|
|
42
48
|
end
|
|
43
49
|
add_message "google.cloud.speech.v1p1beta1.RecognitionConfig" do
|
|
44
50
|
optional :encoding, :enum, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding"
|
|
@@ -159,6 +165,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
|
159
165
|
optional :error, :message, 1, "google.rpc.Status"
|
|
160
166
|
repeated :results, :message, 2, "google.cloud.speech.v1p1beta1.StreamingRecognitionResult"
|
|
161
167
|
optional :speech_event_type, :enum, 4, "google.cloud.speech.v1p1beta1.StreamingRecognizeResponse.SpeechEventType"
|
|
168
|
+
optional :speech_event_time, :message, 8, "google.protobuf.Duration"
|
|
162
169
|
optional :total_billed_time, :message, 5, "google.protobuf.Duration"
|
|
163
170
|
optional :speech_adaptation_info, :message, 9, "google.cloud.speech.v1p1beta1.SpeechAdaptationInfo"
|
|
164
171
|
optional :request_id, :int64, 10
|
|
@@ -166,6 +173,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
|
166
173
|
add_enum "google.cloud.speech.v1p1beta1.StreamingRecognizeResponse.SpeechEventType" do
|
|
167
174
|
value :SPEECH_EVENT_UNSPECIFIED, 0
|
|
168
175
|
value :END_OF_SINGLE_UTTERANCE, 1
|
|
176
|
+
value :SPEECH_ACTIVITY_BEGIN, 2
|
|
177
|
+
value :SPEECH_ACTIVITY_END, 3
|
|
178
|
+
value :SPEECH_ACTIVITY_TIMEOUT, 4
|
|
169
179
|
end
|
|
170
180
|
add_message "google.cloud.speech.v1p1beta1.StreamingRecognitionResult" do
|
|
171
181
|
repeated :alternatives, :message, 1, "google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative"
|
|
@@ -209,6 +219,7 @@ module Google
|
|
|
209
219
|
TranscriptOutputConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.TranscriptOutputConfig").msgclass
|
|
210
220
|
StreamingRecognizeRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognizeRequest").msgclass
|
|
211
221
|
StreamingRecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognitionConfig").msgclass
|
|
222
|
+
StreamingRecognitionConfig::VoiceActivityTimeout = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognitionConfig.VoiceActivityTimeout").msgclass
|
|
212
223
|
RecognitionConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionConfig").msgclass
|
|
213
224
|
RecognitionConfig::AudioEncoding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding").enummodule
|
|
214
225
|
SpeakerDiarizationConfig = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.SpeakerDiarizationConfig").msgclass
|
|
@@ -122,9 +122,30 @@ module Google
|
|
|
122
122
|
# returned as they become available (these interim results are indicated with
|
|
123
123
|
# the `is_final=false` flag).
|
|
124
124
|
# If `false` or omitted, only `is_final=true` result(s) are returned.
|
|
125
|
+
# @!attribute [rw] enable_voice_activity_events
|
|
126
|
+
# @return [::Boolean]
|
|
127
|
+
# If `true`, responses with voice activity speech events will be returned as
|
|
128
|
+
# they are detected.
|
|
129
|
+
# @!attribute [rw] voice_activity_timeout
|
|
130
|
+
# @return [::Google::Cloud::Speech::V1p1beta1::StreamingRecognitionConfig::VoiceActivityTimeout]
|
|
131
|
+
# If set, the server will automatically close the stream after the specified
|
|
132
|
+
# duration has elapsed after the last VOICE_ACTIVITY speech event has been
|
|
133
|
+
# sent. The field `voice_activity_events` must also be set to true.
|
|
125
134
|
class StreamingRecognitionConfig
|
|
126
135
|
include ::Google::Protobuf::MessageExts
|
|
127
136
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
|
137
|
+
|
|
138
|
+
# Events that a timeout can be set on for voice activity.
|
|
139
|
+
# @!attribute [rw] speech_start_timeout
|
|
140
|
+
# @return [::Google::Protobuf::Duration]
|
|
141
|
+
# Duration to timeout the stream if no speech begins.
|
|
142
|
+
# @!attribute [rw] speech_end_timeout
|
|
143
|
+
# @return [::Google::Protobuf::Duration]
|
|
144
|
+
# Duration to timeout the stream after speech ends.
|
|
145
|
+
class VoiceActivityTimeout
|
|
146
|
+
include ::Google::Protobuf::MessageExts
|
|
147
|
+
extend ::Google::Protobuf::MessageExts::ClassMethods
|
|
148
|
+
end
|
|
128
149
|
end
|
|
129
150
|
|
|
130
151
|
# Provides information to the recognizer that specifies how to process the
|
|
@@ -133,7 +154,8 @@ module Google
|
|
|
133
154
|
# @return [::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding]
|
|
134
155
|
# Encoding of audio data sent in all `RecognitionAudio` messages.
|
|
135
156
|
# This field is optional for `FLAC` and `WAV` audio files and required
|
|
136
|
-
# for all other audio formats. For details, see
|
|
157
|
+
# for all other audio formats. For details, see
|
|
158
|
+
# {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
|
137
159
|
# @!attribute [rw] sample_rate_hertz
|
|
138
160
|
# @return [::Integer]
|
|
139
161
|
# Sample rate in Hertz of the audio data sent in all
|
|
@@ -142,7 +164,8 @@ module Google
|
|
|
142
164
|
# source to 16000 Hz. If that's not possible, use the native sample rate of
|
|
143
165
|
# the audio source (instead of re-sampling).
|
|
144
166
|
# This field is optional for FLAC and WAV audio files, but is
|
|
145
|
-
# required for all other audio formats. For details, see
|
|
167
|
+
# required for all other audio formats. For details, see
|
|
168
|
+
# {::Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
|
146
169
|
# @!attribute [rw] audio_channel_count
|
|
147
170
|
# @return [::Integer]
|
|
148
171
|
# The number of channels in the input audio data.
|
|
@@ -363,7 +386,8 @@ module Google
|
|
|
363
386
|
# an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
|
|
364
387
|
# encoding configuration must match the encoding described in the audio
|
|
365
388
|
# header; otherwise the request returns an
|
|
366
|
-
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
|
|
389
|
+
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
|
|
390
|
+
# code.
|
|
367
391
|
module AudioEncoding
|
|
368
392
|
# Not specified.
|
|
369
393
|
ENCODING_UNSPECIFIED = 0
|
|
@@ -612,8 +636,8 @@ module Google
|
|
|
612
636
|
|
|
613
637
|
# Contains audio data in the encoding specified in the `RecognitionConfig`.
|
|
614
638
|
# Either `content` or `uri` must be supplied. Supplying both or neither
|
|
615
|
-
# returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
|
616
|
-
# [content limits](https://cloud.google.com/speech-to-text/quotas#content).
|
|
639
|
+
# returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
|
640
|
+
# See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
|
|
617
641
|
# @!attribute [rw] content
|
|
618
642
|
# @return [::String]
|
|
619
643
|
# The audio data bytes encoded as specified in
|
|
@@ -626,8 +650,9 @@ module Google
|
|
|
626
650
|
# Currently, only Google Cloud Storage URIs are
|
|
627
651
|
# supported, which must be specified in the following format:
|
|
628
652
|
# `gs://bucket_name/object_name` (other URI formats return
|
|
629
|
-
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
|
|
630
|
-
# [Request
|
|
653
|
+
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
|
|
654
|
+
# For more information, see [Request
|
|
655
|
+
# URIs](https://cloud.google.com/storage/docs/reference-uris).
|
|
631
656
|
class RecognitionAudio
|
|
632
657
|
include ::Google::Protobuf::MessageExts
|
|
633
658
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
|
@@ -700,11 +725,12 @@ module Google
|
|
|
700
725
|
# Time of the most recent processing update.
|
|
701
726
|
# @!attribute [r] uri
|
|
702
727
|
# @return [::String]
|
|
703
|
-
# Output only. The URI of the audio file being transcribed. Empty if the
|
|
704
|
-
# as byte content.
|
|
728
|
+
# Output only. The URI of the audio file being transcribed. Empty if the
|
|
729
|
+
# audio was sent as byte content.
|
|
705
730
|
# @!attribute [r] output_config
|
|
706
731
|
# @return [::Google::Cloud::Speech::V1p1beta1::TranscriptOutputConfig]
|
|
707
|
-
# Output only. A copy of the TranscriptOutputConfig if it was set in the
|
|
732
|
+
# Output only. A copy of the TranscriptOutputConfig if it was set in the
|
|
733
|
+
# request.
|
|
708
734
|
class LongRunningRecognizeMetadata
|
|
709
735
|
include ::Google::Protobuf::MessageExts
|
|
710
736
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
|
@@ -772,6 +798,9 @@ module Google
|
|
|
772
798
|
# @!attribute [rw] speech_event_type
|
|
773
799
|
# @return [::Google::Cloud::Speech::V1p1beta1::StreamingRecognizeResponse::SpeechEventType]
|
|
774
800
|
# Indicates the type of speech event.
|
|
801
|
+
# @!attribute [rw] speech_event_time
|
|
802
|
+
# @return [::Google::Protobuf::Duration]
|
|
803
|
+
# Time offset between the beginning of the audio and event emission.
|
|
775
804
|
# @!attribute [rw] total_billed_time
|
|
776
805
|
# @return [::Google::Protobuf::Duration]
|
|
777
806
|
# When available, billed audio seconds for the stream.
|
|
@@ -800,6 +829,23 @@ module Google
|
|
|
800
829
|
# until the server closes the gRPC connection. This event is only sent if
|
|
801
830
|
# `single_utterance` was set to `true`, and is not used otherwise.
|
|
802
831
|
END_OF_SINGLE_UTTERANCE = 1
|
|
832
|
+
|
|
833
|
+
# This event indicates that the server has detected the beginning of human
|
|
834
|
+
# voice activity in the stream. This event can be returned multiple times
|
|
835
|
+
# if speech starts and stops repeatedly throughout the stream. This event
|
|
836
|
+
# is only sent if `voice_activity_events` is set to true.
|
|
837
|
+
SPEECH_ACTIVITY_BEGIN = 2
|
|
838
|
+
|
|
839
|
+
# This event indicates that the server has detected the end of human voice
|
|
840
|
+
# activity in the stream. This event can be returned multiple times if
|
|
841
|
+
# speech starts and stops repeatedly throughout the stream. This event is
|
|
842
|
+
# only sent if `voice_activity_events` is set to true.
|
|
843
|
+
SPEECH_ACTIVITY_END = 3
|
|
844
|
+
|
|
845
|
+
# This event indicates that the user-set timeout for speech activity begin
|
|
846
|
+
# or end has exceeded. Upon receiving this event, the client is expected to
|
|
847
|
+
# send a half close. Further audio will not be processed.
|
|
848
|
+
SPEECH_ACTIVITY_TIMEOUT = 4
|
|
803
849
|
end
|
|
804
850
|
end
|
|
805
851
|
|
|
@@ -836,9 +882,9 @@ module Google
|
|
|
836
882
|
# For audio_channel_count = N, its output values can range from '1' to 'N'.
|
|
837
883
|
# @!attribute [r] language_code
|
|
838
884
|
# @return [::String]
|
|
839
|
-
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
|
840
|
-
# of the language in this result. This language code was
|
|
841
|
-
# the most likelihood of being spoken in the audio.
|
|
885
|
+
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
|
886
|
+
# language tag of the language in this result. This language code was
|
|
887
|
+
# detected to have the most likelihood of being spoken in the audio.
|
|
842
888
|
class StreamingRecognitionResult
|
|
843
889
|
include ::Google::Protobuf::MessageExts
|
|
844
890
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
|
@@ -862,9 +908,9 @@ module Google
|
|
|
862
908
|
# beginning of the audio.
|
|
863
909
|
# @!attribute [r] language_code
|
|
864
910
|
# @return [::String]
|
|
865
|
-
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
|
866
|
-
# of the language in this result. This language code was
|
|
867
|
-
# the most likelihood of being spoken in the audio.
|
|
911
|
+
# Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
|
912
|
+
# language tag of the language in this result. This language code was
|
|
913
|
+
# detected to have the most likelihood of being spoken in the audio.
|
|
868
914
|
class SpeechRecognitionResult
|
|
869
915
|
include ::Google::Protobuf::MessageExts
|
|
870
916
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: google-cloud-speech-v1p1beta1
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.17.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Google LLC
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-02-
|
|
11
|
+
date: 2023-02-28 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: gapic-common
|
|
@@ -16,7 +16,7 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 0.
|
|
19
|
+
version: 0.18.0
|
|
20
20
|
- - "<"
|
|
21
21
|
- !ruby/object:Gem::Version
|
|
22
22
|
version: 2.a
|
|
@@ -26,7 +26,7 @@ dependencies:
|
|
|
26
26
|
requirements:
|
|
27
27
|
- - ">="
|
|
28
28
|
- !ruby/object:Gem::Version
|
|
29
|
-
version: 0.
|
|
29
|
+
version: 0.18.0
|
|
30
30
|
- - "<"
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
32
|
version: 2.a
|
|
@@ -236,5 +236,5 @@ requirements: []
|
|
|
236
236
|
rubygems_version: 3.4.2
|
|
237
237
|
signing_key:
|
|
238
238
|
specification_version: 4
|
|
239
|
-
summary:
|
|
239
|
+
summary: Converts audio to text by applying powerful neural network models.
|
|
240
240
|
test_files: []
|