google-cloud-speech 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,4 @@
1
- # Copyright 2016 Google Inc. All rights reserved.
2
- #
1
+ # Copyright 2017, Google Inc. All rights reserved.
3
2
  # Licensed under the Apache License, Version 2.0 (the "License");
4
3
  # you may not use this file except in compliance with the License.
5
4
  # You may obtain a copy of the License at
@@ -12,4 +11,4 @@
12
11
  # See the License for the specific language governing permissions and
13
12
  # limitations under the License.
14
13
 
15
- require "google/cloud/speech/v1beta1/speech_client"
14
+ require "google/cloud/speech/v1/speech_client"
@@ -0,0 +1,116 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # source: google/cloud/speech/v1/cloud_speech.proto
3
+
4
+ require 'google/protobuf'
5
+
6
+ require 'google/api/annotations_pb'
7
+ require 'google/longrunning/operations_pb'
8
+ require 'google/protobuf/any_pb'
9
+ require 'google/protobuf/duration_pb'
10
+ require 'google/protobuf/timestamp_pb'
11
+ require 'google/rpc/status_pb'
12
+ Google::Protobuf::DescriptorPool.generated_pool.build do
13
+ add_message "google.cloud.speech.v1.RecognizeRequest" do
14
+ optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
15
+ optional :audio, :message, 2, "google.cloud.speech.v1.RecognitionAudio"
16
+ end
17
+ add_message "google.cloud.speech.v1.LongRunningRecognizeRequest" do
18
+ optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
19
+ optional :audio, :message, 2, "google.cloud.speech.v1.RecognitionAudio"
20
+ end
21
+ add_message "google.cloud.speech.v1.StreamingRecognizeRequest" do
22
+ oneof :streaming_request do
23
+ optional :streaming_config, :message, 1, "google.cloud.speech.v1.StreamingRecognitionConfig"
24
+ optional :audio_content, :bytes, 2
25
+ end
26
+ end
27
+ add_message "google.cloud.speech.v1.StreamingRecognitionConfig" do
28
+ optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
29
+ optional :single_utterance, :bool, 2
30
+ optional :interim_results, :bool, 3
31
+ end
32
+ add_message "google.cloud.speech.v1.RecognitionConfig" do
33
+ optional :encoding, :enum, 1, "google.cloud.speech.v1.RecognitionConfig.AudioEncoding"
34
+ optional :sample_rate_hertz, :int32, 2
35
+ optional :language_code, :string, 3
36
+ optional :max_alternatives, :int32, 4
37
+ optional :profanity_filter, :bool, 5
38
+ repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
39
+ end
40
+ add_enum "google.cloud.speech.v1.RecognitionConfig.AudioEncoding" do
41
+ value :ENCODING_UNSPECIFIED, 0
42
+ value :LINEAR16, 1
43
+ value :FLAC, 2
44
+ value :MULAW, 3
45
+ value :AMR, 4
46
+ value :AMR_WB, 5
47
+ value :OGG_OPUS, 6
48
+ value :SPEEX_WITH_HEADER_BYTE, 7
49
+ end
50
+ add_message "google.cloud.speech.v1.SpeechContext" do
51
+ repeated :phrases, :string, 1
52
+ end
53
+ add_message "google.cloud.speech.v1.RecognitionAudio" do
54
+ oneof :audio_source do
55
+ optional :content, :bytes, 1
56
+ optional :uri, :string, 2
57
+ end
58
+ end
59
+ add_message "google.cloud.speech.v1.RecognizeResponse" do
60
+ repeated :results, :message, 2, "google.cloud.speech.v1.SpeechRecognitionResult"
61
+ end
62
+ add_message "google.cloud.speech.v1.LongRunningRecognizeResponse" do
63
+ repeated :results, :message, 2, "google.cloud.speech.v1.SpeechRecognitionResult"
64
+ end
65
+ add_message "google.cloud.speech.v1.LongRunningRecognizeMetadata" do
66
+ optional :progress_percent, :int32, 1
67
+ optional :start_time, :message, 2, "google.protobuf.Timestamp"
68
+ optional :last_update_time, :message, 3, "google.protobuf.Timestamp"
69
+ end
70
+ add_message "google.cloud.speech.v1.StreamingRecognizeResponse" do
71
+ optional :error, :message, 1, "google.rpc.Status"
72
+ repeated :results, :message, 2, "google.cloud.speech.v1.StreamingRecognitionResult"
73
+ optional :speech_event_type, :enum, 4, "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType"
74
+ end
75
+ add_enum "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType" do
76
+ value :SPEECH_EVENT_UNSPECIFIED, 0
77
+ value :END_OF_SINGLE_UTTERANCE, 1
78
+ end
79
+ add_message "google.cloud.speech.v1.StreamingRecognitionResult" do
80
+ repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
81
+ optional :is_final, :bool, 2
82
+ optional :stability, :float, 3
83
+ end
84
+ add_message "google.cloud.speech.v1.SpeechRecognitionResult" do
85
+ repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
86
+ end
87
+ add_message "google.cloud.speech.v1.SpeechRecognitionAlternative" do
88
+ optional :transcript, :string, 1
89
+ optional :confidence, :float, 2
90
+ end
91
+ end
92
+
93
+ module Google
94
+ module Cloud
95
+ module Speech
96
+ module V1
97
+ RecognizeRequest = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognizeRequest").msgclass
98
+ LongRunningRecognizeRequest = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.LongRunningRecognizeRequest").msgclass
99
+ StreamingRecognizeRequest = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeRequest").msgclass
100
+ StreamingRecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig").msgclass
101
+ RecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig").msgclass
102
+ RecognitionConfig::AudioEncoding = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig.AudioEncoding").enummodule
103
+ SpeechContext = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechContext").msgclass
104
+ RecognitionAudio = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionAudio").msgclass
105
+ RecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognizeResponse").msgclass
106
+ LongRunningRecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.LongRunningRecognizeResponse").msgclass
107
+ LongRunningRecognizeMetadata = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.LongRunningRecognizeMetadata").msgclass
108
+ StreamingRecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeResponse").msgclass
109
+ StreamingRecognizeResponse::SpeechEventType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType").enummodule
110
+ StreamingRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionResult").msgclass
111
+ SpeechRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionResult").msgclass
112
+ SpeechRecognitionAlternative = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionAlternative").msgclass
113
+ end
114
+ end
115
+ end
116
+ end
@@ -1,7 +1,7 @@
1
1
  # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # Source: google/cloud/speech/v1beta1/cloud_speech.proto for package 'google.cloud.speech.v1beta1'
2
+ # Source: google/cloud/speech/v1/cloud_speech.proto for package 'google.cloud.speech.v1'
3
3
  # Original file comments:
4
- # Copyright 2016 Google Inc.
4
+ # Copyright 2017 Google Inc.
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,12 +17,12 @@
17
17
  #
18
18
 
19
19
  require 'grpc'
20
- require 'google/cloud/speech/v1beta1/cloud_speech_pb'
20
+ require 'google/cloud/speech/v1/cloud_speech_pb'
21
21
 
22
22
  module Google
23
23
  module Cloud
24
24
  module Speech
25
- module V1beta1
25
+ module V1
26
26
  module Speech
27
27
  # Service that implements Google Cloud Speech API.
28
28
  class Service
@@ -31,17 +31,17 @@ module Google
31
31
 
32
32
  self.marshal_class_method = :encode
33
33
  self.unmarshal_class_method = :decode
34
- self.service_name = 'google.cloud.speech.v1beta1.Speech'
34
+ self.service_name = 'google.cloud.speech.v1.Speech'
35
35
 
36
- # Perform synchronous speech-recognition: receive results after all audio
36
+ # Performs synchronous speech recognition: receive results after all audio
37
37
  # has been sent and processed.
38
- rpc :SyncRecognize, SyncRecognizeRequest, SyncRecognizeResponse
39
- # Perform asynchronous speech-recognition: receive results via the
38
+ rpc :Recognize, RecognizeRequest, RecognizeResponse
39
+ # Performs asynchronous speech recognition: receive results via the
40
40
  # google.longrunning.Operations interface. Returns either an
41
41
  # `Operation.error` or an `Operation.response` which contains
42
- # an `AsyncRecognizeResponse` message.
43
- rpc :AsyncRecognize, AsyncRecognizeRequest, Google::Longrunning::Operation
44
- # Perform bidirectional streaming speech-recognition: receive results while
42
+ # a `LongRunningRecognizeResponse` message.
43
+ rpc :LongRunningRecognize, LongRunningRecognizeRequest, Google::Longrunning::Operation
44
+ # Performs bidirectional streaming speech recognition: receive results while
45
45
  # sending audio. This method is only available via the gRPC API (not REST).
46
46
  rpc :StreamingRecognize, stream(StreamingRecognizeRequest), stream(StreamingRecognizeResponse)
47
47
  end
@@ -15,40 +15,37 @@
15
15
  module Google
16
16
  module Cloud
17
17
  module Speech
18
- module V1beta1
19
- # +SyncRecognizeRequest+ is the top-level message sent by the client for
20
- # the +SyncRecognize+ method.
18
+ module V1
19
+ # The top-level message sent by the client for the +Recognize+ method.
21
20
  # @!attribute [rw] config
22
- # @return [Google::Cloud::Speech::V1beta1::RecognitionConfig]
23
- # [Required] The +config+ message provides information to the recognizer
24
- # that specifies how to process the request.
21
+ # @return [Google::Cloud::Speech::V1::RecognitionConfig]
22
+ # *Required* Provides information to the recognizer that specifies how to
23
+ # process the request.
25
24
  # @!attribute [rw] audio
26
- # @return [Google::Cloud::Speech::V1beta1::RecognitionAudio]
27
- # [Required] The audio data to be recognized.
28
- class SyncRecognizeRequest; end
25
+ # @return [Google::Cloud::Speech::V1::RecognitionAudio]
26
+ # *Required* The audio data to be recognized.
27
+ class RecognizeRequest; end
29
28
 
30
- # +AsyncRecognizeRequest+ is the top-level message sent by the client for
31
- # the +AsyncRecognize+ method.
29
+ # The top-level message sent by the client for the +LongRunningRecognize+
30
+ # method.
32
31
  # @!attribute [rw] config
33
- # @return [Google::Cloud::Speech::V1beta1::RecognitionConfig]
34
- # [Required] The +config+ message provides information to the recognizer
35
- # that specifies how to process the request.
32
+ # @return [Google::Cloud::Speech::V1::RecognitionConfig]
33
+ # *Required* Provides information to the recognizer that specifies how to
34
+ # process the request.
36
35
  # @!attribute [rw] audio
37
- # @return [Google::Cloud::Speech::V1beta1::RecognitionAudio]
38
- # [Required] The audio data to be recognized.
39
- class AsyncRecognizeRequest; end
36
+ # @return [Google::Cloud::Speech::V1::RecognitionAudio]
37
+ # *Required* The audio data to be recognized.
38
+ class LongRunningRecognizeRequest; end
40
39
 
41
- # +StreamingRecognizeRequest+ is the top-level message sent by the client for
42
- # the +StreamingRecognize+. Multiple +StreamingRecognizeRequest+ messages are
43
- # sent. The first message must contain a +streaming_config+ message and must
44
- # not contain +audio+ data. All subsequent messages must contain +audio+ data
45
- # and must not contain a +streaming_config+ message.
40
+ # The top-level message sent by the client for the +StreamingRecognize+ method.
41
+ # Multiple +StreamingRecognizeRequest+ messages are sent. The first message
42
+ # must contain a +streaming_config+ message and must not contain +audio+ data.
43
+ # All subsequent messages must contain +audio+ data and must not contain a
44
+ # +streaming_config+ message.
46
45
  # @!attribute [rw] streaming_config
47
- # @return [Google::Cloud::Speech::V1beta1::StreamingRecognitionConfig]
48
- # The +streaming_config+ message provides information to the recognizer
49
- # that specifies how to process the request.
50
- #
51
- # The first +StreamingRecognizeRequest+ message must contain a
46
+ # @return [Google::Cloud::Speech::V1::StreamingRecognitionConfig]
47
+ # Provides information to the recognizer that specifies how to process the
48
+ # request. The first +StreamingRecognizeRequest+ message must contain a
52
49
  # +streaming_config+ message.
53
50
  # @!attribute [rw] audio_content
54
51
  # @return [String]
@@ -62,68 +59,69 @@ module Google
62
59
  # {audio limits}[https://cloud.google.com/speech/limits#content].
63
60
  class StreamingRecognizeRequest; end
64
61
 
65
- # The +StreamingRecognitionConfig+ message provides information to the
66
- # recognizer that specifies how to process the request.
62
+ # Provides information to the recognizer that specifies how to process the
63
+ # request.
67
64
  # @!attribute [rw] config
68
- # @return [Google::Cloud::Speech::V1beta1::RecognitionConfig]
69
- # [Required] The +config+ message provides information to the recognizer
70
- # that specifies how to process the request.
65
+ # @return [Google::Cloud::Speech::V1::RecognitionConfig]
66
+ # *Required* Provides information to the recognizer that specifies how to
67
+ # process the request.
71
68
  # @!attribute [rw] single_utterance
72
69
  # @return [true, false]
73
- # [Optional] If +false+ or omitted, the recognizer will perform continuous
74
- # recognition (continuing to process audio even if the user pauses speaking)
75
- # until the client closes the output stream (gRPC API) or when the maximum
76
- # time limit has been reached. Multiple +StreamingRecognitionResult+s with
77
- # the +is_final+ flag set to +true+ may be returned.
70
+ # *Optional* If +false+ or omitted, the recognizer will perform continuous
71
+ # recognition (continuing to wait for and process audio even if the user
72
+ # pauses speaking) until the client closes the input stream (gRPC API) or
73
+ # until the maximum time limit has been reached. May return multiple
74
+ # +StreamingRecognitionResult+s with the +is_final+ flag set to +true+.
78
75
  #
79
76
  # If +true+, the recognizer will detect a single spoken utterance. When it
80
77
  # detects that the user has paused or stopped speaking, it will return an
81
- # +END_OF_UTTERANCE+ event and cease recognition. It will return no more than
82
- # one +StreamingRecognitionResult+ with the +is_final+ flag set to +true+.
78
+ # +END_OF_SINGLE_UTTERANCE+ event and cease recognition. It will return no
79
+ # more than one +StreamingRecognitionResult+ with the +is_final+ flag set to
80
+ # +true+.
83
81
  # @!attribute [rw] interim_results
84
82
  # @return [true, false]
85
- # [Optional] If +true+, interim results (tentative hypotheses) may be
83
+ # *Optional* If +true+, interim results (tentative hypotheses) may be
86
84
  # returned as they become available (these interim results are indicated with
87
85
  # the +is_final=false+ flag).
88
86
  # If +false+ or omitted, only +is_final=true+ result(s) are returned.
89
87
  class StreamingRecognitionConfig; end
90
88
 
91
- # The +RecognitionConfig+ message provides information to the recognizer
92
- # that specifies how to process the request.
89
+ # Provides information to the recognizer that specifies how to process the
90
+ # request.
93
91
  # @!attribute [rw] encoding
94
- # @return [Google::Cloud::Speech::V1beta1::RecognitionConfig::AudioEncoding]
95
- # [Required] Encoding of audio data sent in all +RecognitionAudio+ messages.
96
- # @!attribute [rw] sample_rate
92
+ # @return [Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding]
93
+ # *Required* Encoding of audio data sent in all +RecognitionAudio+ messages.
94
+ # @!attribute [rw] sample_rate_hertz
97
95
  # @return [Integer]
98
- # [Required] Sample rate in Hertz of the audio data sent in all
96
+ # *Required* Sample rate in Hertz of the audio data sent in all
99
97
  # +RecognitionAudio+ messages. Valid values are: 8000-48000.
100
98
  # 16000 is optimal. For best results, set the sampling rate of the audio
101
99
  # source to 16000 Hz. If that's not possible, use the native sample rate of
102
100
  # the audio source (instead of re-sampling).
103
101
  # @!attribute [rw] language_code
104
102
  # @return [String]
105
- # [Optional] The language of the supplied audio as a BCP-47 language tag.
106
- # Example: "en-GB" https://www.rfc-editor.org/rfc/bcp/bcp47.txt
107
- # If omitted, defaults to "en-US". See
108
- # {Language Support}[https://cloud.google.com/speech/docs/best-practices#language_support]
103
+ # *Required* The language of the supplied audio as a
104
+ # {BCP-47}[https://www.rfc-editor.org/rfc/bcp/bcp47.txt] language tag.
105
+ # Example: "en-US".
106
+ # See {Language Support}[https://cloud.google.com/speech/docs/languages]
109
107
  # for a list of the currently supported language codes.
110
108
  # @!attribute [rw] max_alternatives
111
109
  # @return [Integer]
112
- # [Optional] Maximum number of recognition hypotheses to be returned.
110
+ # *Optional* Maximum number of recognition hypotheses to be returned.
113
111
  # Specifically, the maximum number of +SpeechRecognitionAlternative+ messages
114
112
  # within each +SpeechRecognitionResult+.
115
113
  # The server may return fewer than +max_alternatives+.
116
114
  # Valid values are +0+-+30+. A value of +0+ or +1+ will return a maximum of
117
- # +1+. If omitted, defaults to +1+.
115
+ # one. If omitted, will return a maximum of one.
118
116
  # @!attribute [rw] profanity_filter
119
117
  # @return [true, false]
120
- # [Optional] If set to +true+, the server will attempt to filter out
118
+ # *Optional* If set to +true+, the server will attempt to filter out
121
119
  # profanities, replacing all but the initial character in each filtered word
122
120
  # with asterisks, e.g. "f***". If set to +false+ or omitted, profanities
123
121
  # won't be filtered out.
124
- # @!attribute [rw] speech_context
125
- # @return [Google::Cloud::Speech::V1beta1::SpeechContext]
126
- # [Optional] A means to provide context to assist the speech recognition.
122
+ # @!attribute [rw] speech_contexts
123
+ # @return [Array<Google::Cloud::Speech::V1::SpeechContext>]
124
+ # *Optional* A means to provide context to assist the speech recognition.
127
125
  class RecognitionConfig
128
126
  # Audio encoding of the data sent in the audio message. All encodings support
129
127
  # only 1 channel (mono) audio. Only +FLAC+ includes a header that describes
@@ -132,34 +130,52 @@ module Google
132
130
  #
133
131
  # For best results, the audio source should be captured and transmitted using
134
132
  # a lossless encoding (+FLAC+ or +LINEAR16+). Recognition accuracy may be
135
- # reduced if lossy codecs (such as AMR, AMR_WB and MULAW) are used to capture
136
- # or transmit the audio, particularly if background noise is present.
133
+ # reduced if lossy codecs, which include the other codecs listed in
134
+ # this section, are used to capture or transmit the audio, particularly if
135
+ # background noise is present.
137
136
  module AudioEncoding
138
137
  # Not specified. Will return result Google::Rpc::Code::INVALID_ARGUMENT.
139
138
  ENCODING_UNSPECIFIED = 0
140
139
 
141
140
  # Uncompressed 16-bit signed little-endian samples (Linear PCM).
142
- # This is the only encoding that may be used by +AsyncRecognize+.
143
141
  LINEAR16 = 1
144
142
 
145
- # This is the recommended encoding for +SyncRecognize+ and
146
- # +StreamingRecognize+ because it uses lossless compression; therefore
147
- # recognition accuracy is not compromised by a lossy codec.
148
- #
149
- # The stream FLAC (Free Lossless Audio Codec) encoding is specified at:
150
- # http://flac.sourceforge.net/documentation.html.
151
- # 16-bit and 24-bit samples are supported.
152
- # Not all fields in STREAMINFO are supported.
143
+ # {+FLAC+}[https://xiph.org/flac/documentation.html] (Free Lossless Audio
144
+ # Codec) is the recommended encoding because it is
145
+ # lossless--therefore recognition is not compromised--and
146
+ # requires only about half the bandwidth of +LINEAR16+. +FLAC+ stream
147
+ # encoding supports 16-bit and 24-bit samples, however, not all fields in
148
+ # +STREAMINFO+ are supported.
153
149
  FLAC = 2
154
150
 
155
151
  # 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
156
152
  MULAW = 3
157
153
 
158
- # Adaptive Multi-Rate Narrowband codec. +sample_rate+ must be 8000 Hz.
154
+ # Adaptive Multi-Rate Narrowband codec. +sample_rate_hertz+ must be 8000.
159
155
  AMR = 4
160
156
 
161
- # Adaptive Multi-Rate Wideband codec. +sample_rate+ must be 16000 Hz.
157
+ # Adaptive Multi-Rate Wideband codec. +sample_rate_hertz+ must be 16000.
162
158
  AMR_WB = 5
159
+
160
+ # Opus encoded audio frames in Ogg container
161
+ # ({OggOpus}[https://wiki.xiph.org/OggOpus]).
162
+ # +sample_rate_hertz+ must be 16000.
163
+ OGG_OPUS = 6
164
+
165
+ # Although the use of lossy encodings is not recommended, if a very low
166
+ # bitrate encoding is required, +OGG_OPUS+ is highly preferred over
167
+ # Speex encoding. The {Speex}[https://speex.org/] encoding supported by
168
+ # Cloud Speech API has a header byte in each block, as in MIME type
169
+ # +audio/x-speex-with-header-byte+.
170
+ # It is a variant of the RTP Speex encoding defined in
171
+ # {RFC 5574}[https://tools.ietf.org/html/rfc5574].
172
+ # The stream is a sequence of blocks, one block per RTP packet. Each block
173
+ # starts with a byte containing the length of the block, in bytes, followed
174
+ # by one or more frames of Speex data, padded to an integral number of
175
+ # bytes (octets) as specified in RFC 5574. In other words, each RTP header
176
+ # is replaced with a single byte containing the block length. Only Speex
177
+ # wideband is supported. +sample_rate_hertz+ must be 16000.
178
+ SPEEX_WITH_HEADER_BYTE = 7
163
179
  end
164
180
  end
165
181
 
@@ -167,7 +183,7 @@ module Google
167
183
  # in the results.
168
184
  # @!attribute [rw] phrases
169
185
  # @return [Array<String>]
170
- # [Optional] A list of strings containing words and phrases "hints" so that
186
+ # *Optional* A list of strings containing words and phrases "hints" so that
171
187
  # the speech recognition is more likely to recognize them. This can be used
172
188
  # to improve the accuracy for specific words and phrases, for example, if
173
189
  # specific commands are typically spoken by the user. This can also be used
@@ -194,30 +210,29 @@ module Google
194
210
  # {Request URIs}[https://cloud.google.com/storage/docs/reference-uris].
195
211
  class RecognitionAudio; end
196
212
 
197
- # +SyncRecognizeResponse+ is the only message returned to the client by
198
- # +SyncRecognize+. It contains the result as zero or more sequential
199
- # +SpeechRecognitionResult+ messages.
213
+ # The only message returned to the client by the +Recognize+ method. It
214
+ # contains the result as zero or more sequential +SpeechRecognitionResult+
215
+ # messages.
200
216
  # @!attribute [rw] results
201
- # @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionResult>]
202
- # [Output-only] Sequential list of transcription results corresponding to
217
+ # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionResult>]
218
+ # *Output-only* Sequential list of transcription results corresponding to
203
219
  # sequential portions of audio.
204
- class SyncRecognizeResponse; end
220
+ class RecognizeResponse; end
205
221
 
206
- # +AsyncRecognizeResponse+ is the only message returned to the client by
207
- # +AsyncRecognize+. It contains the result as zero or more sequential
208
- # +SpeechRecognitionResult+ messages. It is included in the +result.response+
209
- # field of the +Operation+ returned by the +GetOperation+ call of the
210
- # +google::longrunning::Operations+ service.
222
+ # The only message returned to the client by the +LongRunningRecognize+ method.
223
+ # It contains the result as zero or more sequential +SpeechRecognitionResult+
224
+ # messages. It is included in the +result.response+ field of the +Operation+
225
+ # returned by the +GetOperation+ call of the +google::longrunning::Operations+
226
+ # service.
211
227
  # @!attribute [rw] results
212
- # @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionResult>]
213
- # [Output-only] Sequential list of transcription results corresponding to
228
+ # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionResult>]
229
+ # *Output-only* Sequential list of transcription results corresponding to
214
230
  # sequential portions of audio.
215
- class AsyncRecognizeResponse; end
231
+ class LongRunningRecognizeResponse; end
216
232
 
217
- # +AsyncRecognizeMetadata+ describes the progress of a long-running
218
- # +AsyncRecognize+ call. It is included in the +metadata+ field of the
219
- # +Operation+ returned by the +GetOperation+ call of the
220
- # +google::longrunning::Operations+ service.
233
+ # Describes the progress of a long-running +LongRunningRecognize+ call. It is
234
+ # included in the +metadata+ field of the +Operation+ returned by the
235
+ # +GetOperation+ call of the +google::longrunning::Operations+ service.
221
236
  # @!attribute [rw] progress_percent
222
237
  # @return [Integer]
223
238
  # Approximate percentage of audio processed thus far. Guaranteed to be 100
@@ -228,7 +243,7 @@ module Google
228
243
  # @!attribute [rw] last_update_time
229
244
  # @return [Google::Protobuf::Timestamp]
230
245
  # Time of the most recent processing update.
231
- class AsyncRecognizeMetadata; end
246
+ class LongRunningRecognizeMetadata; end
232
247
 
233
248
  # +StreamingRecognizeResponse+ is the only message returned to the client by
234
249
  # +StreamingRecognize+. A series of one or more +StreamingRecognizeResponse+
@@ -237,139 +252,120 @@ module Google
237
252
  # Here's an example of a series of ten +StreamingRecognizeResponse+s that might
238
253
  # be returned while processing audio:
239
254
  #
240
- # 1. endpointer_type: START_OF_SPEECH
255
+ # 1. results { alternatives { transcript: "tube" } stability: 0.01 }
241
256
  #
242
- # 2. results { alternatives { transcript: "tube" } stability: 0.01 }
243
- # result_index: 0
257
+ # 2. results { alternatives { transcript: "to be a" } stability: 0.01 }
244
258
  #
245
- # 3. results { alternatives { transcript: "to be a" } stability: 0.01 }
246
- # result_index: 0
247
- #
248
- # 4. results { alternatives { transcript: "to be" } stability: 0.9 }
259
+ # 3. results { alternatives { transcript: "to be" } stability: 0.9 }
249
260
  # results { alternatives { transcript: " or not to be" } stability: 0.01 }
250
- # result_index: 0
251
261
  #
252
- # 5. results { alternatives { transcript: "to be or not to be"
262
+ # 4. results { alternatives { transcript: "to be or not to be"
253
263
  # confidence: 0.92 }
254
264
  # alternatives { transcript: "to bee or not to bee" }
255
265
  # is_final: true }
256
- # result_index: 0
257
266
  #
258
- # 6. results { alternatives { transcript: " that's" } stability: 0.01 }
259
- # result_index: 1
267
+ # 5. results { alternatives { transcript: " that's" } stability: 0.01 }
260
268
  #
261
- # 7. results { alternatives { transcript: " that is" } stability: 0.9 }
269
+ # 6. results { alternatives { transcript: " that is" } stability: 0.9 }
262
270
  # results { alternatives { transcript: " the question" } stability: 0.01 }
263
- # result_index: 1
264
271
  #
265
- # 8. endpointer_type: END_OF_SPEECH
272
+ # 7. speech_event_type: END_OF_SINGLE_UTTERANCE
266
273
  #
267
- # 9. results { alternatives { transcript: " that is the question"
274
+ # 8. results { alternatives { transcript: " that is the question"
268
275
  # confidence: 0.98 }
269
276
  # alternatives { transcript: " that was the question" }
270
277
  # is_final: true }
271
- # result_index: 1
272
- #
273
- # 10. endpointer_type: END_OF_AUDIO
274
278
  #
275
279
  # Notes:
276
280
  #
277
- # - Only two of the above responses #5 and #9 contain final results, they are
281
+ # - Only two of the above responses #4 and #8 contain final results; they are
278
282
  # indicated by +is_final: true+. Concatenating these together generates the
279
283
  # full transcript: "to be or not to be that is the question".
280
284
  #
281
- # - The others contain interim +results+. #4 and #7 contain two interim
282
- # +results+, the first portion has a high stability and is less likely to
283
- # change, the second portion has a low stability and is very likely to
285
+ # - The others contain interim +results+. #3 and #6 contain two interim
286
+ # +results+: the first portion has a high stability and is less likely to
287
+ # change; the second portion has a low stability and is very likely to
284
288
  # change. A UI designer might choose to show only high stability +results+.
285
289
  #
286
- # - The +result_index+ indicates the portion of audio that has had final
287
- # results returned, and is no longer being processed. For example, the
288
- # +results+ in #6 and later correspond to the portion of audio after
289
- # "to be or not to be".
290
+ # - The specific +stability+ and +confidence+ values shown above are only for
291
+ # illustrative purposes. Actual values may vary.
292
+ #
293
+ # - In each response, only one of these fields will be set:
294
+ # +error+,
295
+ # +speech_event_type+, or
296
+ # one or more (repeated) +results+.
290
297
  # @!attribute [rw] error
291
298
  # @return [Google::Rpc::Status]
292
- # [Output-only] If set, returns a Google::Rpc::Status message that
299
+ # *Output-only* If set, returns a Google::Rpc::Status message that
293
300
  # specifies the error for the operation.
294
301
  # @!attribute [rw] results
295
- # @return [Array<Google::Cloud::Speech::V1beta1::StreamingRecognitionResult>]
296
- # [Output-only] This repeated list contains zero or more results that
302
+ # @return [Array<Google::Cloud::Speech::V1::StreamingRecognitionResult>]
303
+ # *Output-only* This repeated list contains zero or more results that
297
304
  # correspond to consecutive portions of the audio currently being processed.
298
305
  # It contains zero or one +is_final=true+ result (the newly settled portion),
299
306
  # followed by zero or more +is_final=false+ results.
300
- # @!attribute [rw] result_index
301
- # @return [Integer]
302
- # [Output-only] Indicates the lowest index in the +results+ array that has
303
- # changed. The repeated +StreamingRecognitionResult+ results overwrite past
304
- # results at this index and higher.
305
- # @!attribute [rw] endpointer_type
306
- # @return [Google::Cloud::Speech::V1beta1::StreamingRecognizeResponse::EndpointerType]
307
- # [Output-only] Indicates the type of endpointer event.
307
+ # @!attribute [rw] speech_event_type
308
+ # @return [Google::Cloud::Speech::V1::StreamingRecognizeResponse::SpeechEventType]
309
+ # *Output-only* Indicates the type of speech event.
308
310
  class StreamingRecognizeResponse
309
- # Indicates the type of endpointer event.
310
- module EndpointerType
311
- # No endpointer event specified.
312
- ENDPOINTER_EVENT_UNSPECIFIED = 0
313
-
314
- # Speech has been detected in the audio stream.
315
- START_OF_SPEECH = 1
316
-
317
- # Speech has ceased to be detected in the audio stream.
318
- END_OF_SPEECH = 2
319
-
320
- # The end of the audio stream has been reached. and it is being processed.
321
- END_OF_AUDIO = 3
311
+ # Indicates the type of speech event.
312
+ module SpeechEventType
313
+ # No speech event specified.
314
+ SPEECH_EVENT_UNSPECIFIED = 0
322
315
 
323
- # This event is only sent when +single_utterance+ is +true+. It indicates
324
- # that the server has detected the end of the user's speech utterance and
325
- # expects no additional speech. Therefore, the server will not process
326
- # additional audio. The client should stop sending additional audio data.
327
- END_OF_UTTERANCE = 4
316
+ # This event indicates that the server has detected the end of the user's
317
+ # speech utterance and expects no additional speech. Therefore, the server
318
+ # will not process additional audio (although it may subsequently return
319
+ # additional results). The client should stop sending additional audio
320
+ # data, half-close the gRPC connection, and wait for any additional results
321
+ # until the server closes the gRPC connection. This event is only sent if
322
+ # +single_utterance+ was set to +true+, and is not used otherwise.
323
+ END_OF_SINGLE_UTTERANCE = 1
328
324
  end
329
325
  end
330
326
 
331
327
  # A streaming speech recognition result corresponding to a portion of the audio
332
328
  # that is currently being processed.
333
329
  # @!attribute [rw] alternatives
334
- # @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionAlternative>]
335
- # [Output-only] May contain one or more recognition hypotheses (up to the
330
+ # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
331
+ # *Output-only* May contain one or more recognition hypotheses (up to the
336
332
  # maximum specified in +max_alternatives+).
337
333
  # @!attribute [rw] is_final
338
334
  # @return [true, false]
339
- # [Output-only] If +false+, this +StreamingRecognitionResult+ represents an
335
+ # *Output-only* If +false+, this +StreamingRecognitionResult+ represents an
340
336
  # interim result that may change. If +true+, this is the final time the
341
337
  # speech service will return this particular +StreamingRecognitionResult+,
342
338
  # the recognizer will not return any further hypotheses for this portion of
343
339
  # the transcript and corresponding audio.
344
340
  # @!attribute [rw] stability
345
341
  # @return [Float]
346
- # [Output-only] An estimate of the probability that the recognizer will not
342
+ # *Output-only* An estimate of the likelihood that the recognizer will not
347
343
  # change its guess about this interim result. Values range from 0.0
348
- # (completely unstable) to 1.0 (completely stable). Note that this is not the
349
- # same as +confidence+, which estimates the probability that a recognition
350
- # result is correct.
344
+ # (completely unstable) to 1.0 (completely stable).
351
345
  # This field is only provided for interim results (+is_final=false+).
352
- # The default of 0.0 is a sentinel value indicating stability was not set.
346
+ # The default of 0.0 is a sentinel value indicating +stability+ was not set.
353
347
  class StreamingRecognitionResult; end
354
348
 
355
349
  # A speech recognition result corresponding to a portion of the audio.
356
350
  # @!attribute [rw] alternatives
357
- # @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionAlternative>]
358
- # [Output-only] May contain one or more recognition hypotheses (up to the
351
+ # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
352
+ # *Output-only* May contain one or more recognition hypotheses (up to the
359
353
  # maximum specified in +max_alternatives+).
360
354
  class SpeechRecognitionResult; end
361
355
 
362
356
  # Alternative hypotheses (a.k.a. n-best list).
363
357
  # @!attribute [rw] transcript
364
358
  # @return [String]
365
- # [Output-only] Transcript text representing the words that the user spoke.
359
+ # *Output-only* Transcript text representing the words that the user spoke.
366
360
  # @!attribute [rw] confidence
367
361
  # @return [Float]
368
- # [Output-only] The confidence estimate between 0.0 and 1.0. A higher number
369
- # means the system is more confident that the recognition is correct.
370
- # This field is typically provided only for the top hypothesis, and only for
371
- # +is_final=true+ results.
372
- # The default of 0.0 is a sentinel value indicating confidence was not set.
362
+ # *Output-only* The confidence estimate between 0.0 and 1.0. A higher number
363
+ # indicates an estimated greater likelihood that the recognized words are
364
+ # correct. This field is typically provided only for the top hypothesis, and
365
+ # only for +is_final=true+ results. Clients should not rely on the
366
+ # +confidence+ field as it is not guaranteed to be accurate, or even set, in
367
+ # any of the results.
368
+ # The default of 0.0 is a sentinel value indicating +confidence+ was not set.
373
369
  class SpeechRecognitionAlternative; end
374
370
  end
375
371
  end