google-cloud-speech 0.23.0 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,4 @@
1
- # Copyright 2016 Google Inc. All rights reserved.
2
- #
1
+ # Copyright 2017, Google Inc. All rights reserved.
3
2
  # Licensed under the Apache License, Version 2.0 (the "License");
4
3
  # you may not use this file except in compliance with the License.
5
4
  # You may obtain a copy of the License at
@@ -12,4 +11,4 @@
12
11
  # See the License for the specific language governing permissions and
13
12
  # limitations under the License.
14
13
 
15
- require "google/cloud/speech/v1beta1/speech_client"
14
+ require "google/cloud/speech/v1/speech_client"
@@ -0,0 +1,116 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # source: google/cloud/speech/v1/cloud_speech.proto
3
+
4
+ require 'google/protobuf'
5
+
6
+ require 'google/api/annotations_pb'
7
+ require 'google/longrunning/operations_pb'
8
+ require 'google/protobuf/any_pb'
9
+ require 'google/protobuf/duration_pb'
10
+ require 'google/protobuf/timestamp_pb'
11
+ require 'google/rpc/status_pb'
12
+ Google::Protobuf::DescriptorPool.generated_pool.build do
13
+ add_message "google.cloud.speech.v1.RecognizeRequest" do
14
+ optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
15
+ optional :audio, :message, 2, "google.cloud.speech.v1.RecognitionAudio"
16
+ end
17
+ add_message "google.cloud.speech.v1.LongRunningRecognizeRequest" do
18
+ optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
19
+ optional :audio, :message, 2, "google.cloud.speech.v1.RecognitionAudio"
20
+ end
21
+ add_message "google.cloud.speech.v1.StreamingRecognizeRequest" do
22
+ oneof :streaming_request do
23
+ optional :streaming_config, :message, 1, "google.cloud.speech.v1.StreamingRecognitionConfig"
24
+ optional :audio_content, :bytes, 2
25
+ end
26
+ end
27
+ add_message "google.cloud.speech.v1.StreamingRecognitionConfig" do
28
+ optional :config, :message, 1, "google.cloud.speech.v1.RecognitionConfig"
29
+ optional :single_utterance, :bool, 2
30
+ optional :interim_results, :bool, 3
31
+ end
32
+ add_message "google.cloud.speech.v1.RecognitionConfig" do
33
+ optional :encoding, :enum, 1, "google.cloud.speech.v1.RecognitionConfig.AudioEncoding"
34
+ optional :sample_rate_hertz, :int32, 2
35
+ optional :language_code, :string, 3
36
+ optional :max_alternatives, :int32, 4
37
+ optional :profanity_filter, :bool, 5
38
+ repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
39
+ end
40
+ add_enum "google.cloud.speech.v1.RecognitionConfig.AudioEncoding" do
41
+ value :ENCODING_UNSPECIFIED, 0
42
+ value :LINEAR16, 1
43
+ value :FLAC, 2
44
+ value :MULAW, 3
45
+ value :AMR, 4
46
+ value :AMR_WB, 5
47
+ value :OGG_OPUS, 6
48
+ value :SPEEX_WITH_HEADER_BYTE, 7
49
+ end
50
+ add_message "google.cloud.speech.v1.SpeechContext" do
51
+ repeated :phrases, :string, 1
52
+ end
53
+ add_message "google.cloud.speech.v1.RecognitionAudio" do
54
+ oneof :audio_source do
55
+ optional :content, :bytes, 1
56
+ optional :uri, :string, 2
57
+ end
58
+ end
59
+ add_message "google.cloud.speech.v1.RecognizeResponse" do
60
+ repeated :results, :message, 2, "google.cloud.speech.v1.SpeechRecognitionResult"
61
+ end
62
+ add_message "google.cloud.speech.v1.LongRunningRecognizeResponse" do
63
+ repeated :results, :message, 2, "google.cloud.speech.v1.SpeechRecognitionResult"
64
+ end
65
+ add_message "google.cloud.speech.v1.LongRunningRecognizeMetadata" do
66
+ optional :progress_percent, :int32, 1
67
+ optional :start_time, :message, 2, "google.protobuf.Timestamp"
68
+ optional :last_update_time, :message, 3, "google.protobuf.Timestamp"
69
+ end
70
+ add_message "google.cloud.speech.v1.StreamingRecognizeResponse" do
71
+ optional :error, :message, 1, "google.rpc.Status"
72
+ repeated :results, :message, 2, "google.cloud.speech.v1.StreamingRecognitionResult"
73
+ optional :speech_event_type, :enum, 4, "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType"
74
+ end
75
+ add_enum "google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType" do
76
+ value :SPEECH_EVENT_UNSPECIFIED, 0
77
+ value :END_OF_SINGLE_UTTERANCE, 1
78
+ end
79
+ add_message "google.cloud.speech.v1.StreamingRecognitionResult" do
80
+ repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
81
+ optional :is_final, :bool, 2
82
+ optional :stability, :float, 3
83
+ end
84
+ add_message "google.cloud.speech.v1.SpeechRecognitionResult" do
85
+ repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
86
+ end
87
+ add_message "google.cloud.speech.v1.SpeechRecognitionAlternative" do
88
+ optional :transcript, :string, 1
89
+ optional :confidence, :float, 2
90
+ end
91
+ end
92
+
93
+ module Google
94
+ module Cloud
95
+ module Speech
96
+ module V1
97
+ RecognizeRequest = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognizeRequest").msgclass
98
+ LongRunningRecognizeRequest = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.LongRunningRecognizeRequest").msgclass
99
+ StreamingRecognizeRequest = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeRequest").msgclass
100
+ StreamingRecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig").msgclass
101
+ RecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig").msgclass
102
+ RecognitionConfig::AudioEncoding = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig.AudioEncoding").enummodule
103
+ SpeechContext = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechContext").msgclass
104
+ RecognitionAudio = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionAudio").msgclass
105
+ RecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognizeResponse").msgclass
106
+ LongRunningRecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.LongRunningRecognizeResponse").msgclass
107
+ LongRunningRecognizeMetadata = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.LongRunningRecognizeMetadata").msgclass
108
+ StreamingRecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeResponse").msgclass
109
+ StreamingRecognizeResponse::SpeechEventType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognizeResponse.SpeechEventType").enummodule
110
+ StreamingRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionResult").msgclass
111
+ SpeechRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionResult").msgclass
112
+ SpeechRecognitionAlternative = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechRecognitionAlternative").msgclass
113
+ end
114
+ end
115
+ end
116
+ end
@@ -1,7 +1,7 @@
1
1
  # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # Source: google/cloud/speech/v1beta1/cloud_speech.proto for package 'google.cloud.speech.v1beta1'
2
+ # Source: google/cloud/speech/v1/cloud_speech.proto for package 'google.cloud.speech.v1'
3
3
  # Original file comments:
4
- # Copyright 2016 Google Inc.
4
+ # Copyright 2017 Google Inc.
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,12 +17,12 @@
17
17
  #
18
18
 
19
19
  require 'grpc'
20
- require 'google/cloud/speech/v1beta1/cloud_speech_pb'
20
+ require 'google/cloud/speech/v1/cloud_speech_pb'
21
21
 
22
22
  module Google
23
23
  module Cloud
24
24
  module Speech
25
- module V1beta1
25
+ module V1
26
26
  module Speech
27
27
  # Service that implements Google Cloud Speech API.
28
28
  class Service
@@ -31,17 +31,17 @@ module Google
31
31
 
32
32
  self.marshal_class_method = :encode
33
33
  self.unmarshal_class_method = :decode
34
- self.service_name = 'google.cloud.speech.v1beta1.Speech'
34
+ self.service_name = 'google.cloud.speech.v1.Speech'
35
35
 
36
- # Perform synchronous speech-recognition: receive results after all audio
36
+ # Performs synchronous speech recognition: receive results after all audio
37
37
  # has been sent and processed.
38
- rpc :SyncRecognize, SyncRecognizeRequest, SyncRecognizeResponse
39
- # Perform asynchronous speech-recognition: receive results via the
38
+ rpc :Recognize, RecognizeRequest, RecognizeResponse
39
+ # Performs asynchronous speech recognition: receive results via the
40
40
  # google.longrunning.Operations interface. Returns either an
41
41
  # `Operation.error` or an `Operation.response` which contains
42
- # an `AsyncRecognizeResponse` message.
43
- rpc :AsyncRecognize, AsyncRecognizeRequest, Google::Longrunning::Operation
44
- # Perform bidirectional streaming speech-recognition: receive results while
42
+ # a `LongRunningRecognizeResponse` message.
43
+ rpc :LongRunningRecognize, LongRunningRecognizeRequest, Google::Longrunning::Operation
44
+ # Performs bidirectional streaming speech recognition: receive results while
45
45
  # sending audio. This method is only available via the gRPC API (not REST).
46
46
  rpc :StreamingRecognize, stream(StreamingRecognizeRequest), stream(StreamingRecognizeResponse)
47
47
  end
@@ -15,40 +15,37 @@
15
15
  module Google
16
16
  module Cloud
17
17
  module Speech
18
- module V1beta1
19
- # +SyncRecognizeRequest+ is the top-level message sent by the client for
20
- # the +SyncRecognize+ method.
18
+ module V1
19
+ # The top-level message sent by the client for the +Recognize+ method.
21
20
  # @!attribute [rw] config
22
- # @return [Google::Cloud::Speech::V1beta1::RecognitionConfig]
23
- # [Required] The +config+ message provides information to the recognizer
24
- # that specifies how to process the request.
21
+ # @return [Google::Cloud::Speech::V1::RecognitionConfig]
22
+ # *Required* Provides information to the recognizer that specifies how to
23
+ # process the request.
25
24
  # @!attribute [rw] audio
26
- # @return [Google::Cloud::Speech::V1beta1::RecognitionAudio]
27
- # [Required] The audio data to be recognized.
28
- class SyncRecognizeRequest; end
25
+ # @return [Google::Cloud::Speech::V1::RecognitionAudio]
26
+ # *Required* The audio data to be recognized.
27
+ class RecognizeRequest; end
29
28
 
30
- # +AsyncRecognizeRequest+ is the top-level message sent by the client for
31
- # the +AsyncRecognize+ method.
29
+ # The top-level message sent by the client for the +LongRunningRecognize+
30
+ # method.
32
31
  # @!attribute [rw] config
33
- # @return [Google::Cloud::Speech::V1beta1::RecognitionConfig]
34
- # [Required] The +config+ message provides information to the recognizer
35
- # that specifies how to process the request.
32
+ # @return [Google::Cloud::Speech::V1::RecognitionConfig]
33
+ # *Required* Provides information to the recognizer that specifies how to
34
+ # process the request.
36
35
  # @!attribute [rw] audio
37
- # @return [Google::Cloud::Speech::V1beta1::RecognitionAudio]
38
- # [Required] The audio data to be recognized.
39
- class AsyncRecognizeRequest; end
36
+ # @return [Google::Cloud::Speech::V1::RecognitionAudio]
37
+ # *Required* The audio data to be recognized.
38
+ class LongRunningRecognizeRequest; end
40
39
 
41
- # +StreamingRecognizeRequest+ is the top-level message sent by the client for
42
- # the +StreamingRecognize+. Multiple +StreamingRecognizeRequest+ messages are
43
- # sent. The first message must contain a +streaming_config+ message and must
44
- # not contain +audio+ data. All subsequent messages must contain +audio+ data
45
- # and must not contain a +streaming_config+ message.
40
+ # The top-level message sent by the client for the +StreamingRecognize+ method.
41
+ # Multiple +StreamingRecognizeRequest+ messages are sent. The first message
42
+ # must contain a +streaming_config+ message and must not contain +audio+ data.
43
+ # All subsequent messages must contain +audio+ data and must not contain a
44
+ # +streaming_config+ message.
46
45
  # @!attribute [rw] streaming_config
47
- # @return [Google::Cloud::Speech::V1beta1::StreamingRecognitionConfig]
48
- # The +streaming_config+ message provides information to the recognizer
49
- # that specifies how to process the request.
50
- #
51
- # The first +StreamingRecognizeRequest+ message must contain a
46
+ # @return [Google::Cloud::Speech::V1::StreamingRecognitionConfig]
47
+ # Provides information to the recognizer that specifies how to process the
48
+ # request. The first +StreamingRecognizeRequest+ message must contain a
52
49
  # +streaming_config+ message.
53
50
  # @!attribute [rw] audio_content
54
51
  # @return [String]
@@ -62,68 +59,69 @@ module Google
62
59
  # {audio limits}[https://cloud.google.com/speech/limits#content].
63
60
  class StreamingRecognizeRequest; end
64
61
 
65
- # The +StreamingRecognitionConfig+ message provides information to the
66
- # recognizer that specifies how to process the request.
62
+ # Provides information to the recognizer that specifies how to process the
63
+ # request.
67
64
  # @!attribute [rw] config
68
- # @return [Google::Cloud::Speech::V1beta1::RecognitionConfig]
69
- # [Required] The +config+ message provides information to the recognizer
70
- # that specifies how to process the request.
65
+ # @return [Google::Cloud::Speech::V1::RecognitionConfig]
66
+ # *Required* Provides information to the recognizer that specifies how to
67
+ # process the request.
71
68
  # @!attribute [rw] single_utterance
72
69
  # @return [true, false]
73
- # [Optional] If +false+ or omitted, the recognizer will perform continuous
74
- # recognition (continuing to process audio even if the user pauses speaking)
75
- # until the client closes the output stream (gRPC API) or when the maximum
76
- # time limit has been reached. Multiple +StreamingRecognitionResult+s with
77
- # the +is_final+ flag set to +true+ may be returned.
70
+ # *Optional* If +false+ or omitted, the recognizer will perform continuous
71
+ # recognition (continuing to wait for and process audio even if the user
72
+ # pauses speaking) until the client closes the input stream (gRPC API) or
73
+ # until the maximum time limit has been reached. May return multiple
74
+ # +StreamingRecognitionResult+s with the +is_final+ flag set to +true+.
78
75
  #
79
76
  # If +true+, the recognizer will detect a single spoken utterance. When it
80
77
  # detects that the user has paused or stopped speaking, it will return an
81
- # +END_OF_UTTERANCE+ event and cease recognition. It will return no more than
82
- # one +StreamingRecognitionResult+ with the +is_final+ flag set to +true+.
78
+ # +END_OF_SINGLE_UTTERANCE+ event and cease recognition. It will return no
79
+ # more than one +StreamingRecognitionResult+ with the +is_final+ flag set to
80
+ # +true+.
83
81
  # @!attribute [rw] interim_results
84
82
  # @return [true, false]
85
- # [Optional] If +true+, interim results (tentative hypotheses) may be
83
+ # *Optional* If +true+, interim results (tentative hypotheses) may be
86
84
  # returned as they become available (these interim results are indicated with
87
85
  # the +is_final=false+ flag).
88
86
  # If +false+ or omitted, only +is_final=true+ result(s) are returned.
89
87
  class StreamingRecognitionConfig; end
90
88
 
91
- # The +RecognitionConfig+ message provides information to the recognizer
92
- # that specifies how to process the request.
89
+ # Provides information to the recognizer that specifies how to process the
90
+ # request.
93
91
  # @!attribute [rw] encoding
94
- # @return [Google::Cloud::Speech::V1beta1::RecognitionConfig::AudioEncoding]
95
- # [Required] Encoding of audio data sent in all +RecognitionAudio+ messages.
96
- # @!attribute [rw] sample_rate
92
+ # @return [Google::Cloud::Speech::V1::RecognitionConfig::AudioEncoding]
93
+ # *Required* Encoding of audio data sent in all +RecognitionAudio+ messages.
94
+ # @!attribute [rw] sample_rate_hertz
97
95
  # @return [Integer]
98
- # [Required] Sample rate in Hertz of the audio data sent in all
96
+ # *Required* Sample rate in Hertz of the audio data sent in all
99
97
  # +RecognitionAudio+ messages. Valid values are: 8000-48000.
100
98
  # 16000 is optimal. For best results, set the sampling rate of the audio
101
99
  # source to 16000 Hz. If that's not possible, use the native sample rate of
102
100
  # the audio source (instead of re-sampling).
103
101
  # @!attribute [rw] language_code
104
102
  # @return [String]
105
- # [Optional] The language of the supplied audio as a BCP-47 language tag.
106
- # Example: "en-GB" https://www.rfc-editor.org/rfc/bcp/bcp47.txt
107
- # If omitted, defaults to "en-US". See
108
- # {Language Support}[https://cloud.google.com/speech/docs/best-practices#language_support]
103
+ # *Required* The language of the supplied audio as a
104
+ # {BCP-47}[https://www.rfc-editor.org/rfc/bcp/bcp47.txt] language tag.
105
+ # Example: "en-US".
106
+ # See {Language Support}[https://cloud.google.com/speech/docs/languages]
109
107
  # for a list of the currently supported language codes.
110
108
  # @!attribute [rw] max_alternatives
111
109
  # @return [Integer]
112
- # [Optional] Maximum number of recognition hypotheses to be returned.
110
+ # *Optional* Maximum number of recognition hypotheses to be returned.
113
111
  # Specifically, the maximum number of +SpeechRecognitionAlternative+ messages
114
112
  # within each +SpeechRecognitionResult+.
115
113
  # The server may return fewer than +max_alternatives+.
116
114
  # Valid values are +0+-+30+. A value of +0+ or +1+ will return a maximum of
117
- # +1+. If omitted, defaults to +1+.
115
+ # one. If omitted, will return a maximum of one.
118
116
  # @!attribute [rw] profanity_filter
119
117
  # @return [true, false]
120
- # [Optional] If set to +true+, the server will attempt to filter out
118
+ # *Optional* If set to +true+, the server will attempt to filter out
121
119
  # profanities, replacing all but the initial character in each filtered word
122
120
  # with asterisks, e.g. "f***". If set to +false+ or omitted, profanities
123
121
  # won't be filtered out.
124
- # @!attribute [rw] speech_context
125
- # @return [Google::Cloud::Speech::V1beta1::SpeechContext]
126
- # [Optional] A means to provide context to assist the speech recognition.
122
+ # @!attribute [rw] speech_contexts
123
+ # @return [Array<Google::Cloud::Speech::V1::SpeechContext>]
124
+ # *Optional* A means to provide context to assist the speech recognition.
127
125
  class RecognitionConfig
128
126
  # Audio encoding of the data sent in the audio message. All encodings support
129
127
  # only 1 channel (mono) audio. Only +FLAC+ includes a header that describes
@@ -132,34 +130,52 @@ module Google
132
130
  #
133
131
  # For best results, the audio source should be captured and transmitted using
134
132
  # a lossless encoding (+FLAC+ or +LINEAR16+). Recognition accuracy may be
135
- # reduced if lossy codecs (such as AMR, AMR_WB and MULAW) are used to capture
136
- # or transmit the audio, particularly if background noise is present.
133
+ # reduced if lossy codecs, which include the other codecs listed in
134
+ # this section, are used to capture or transmit the audio, particularly if
135
+ # background noise is present.
137
136
  module AudioEncoding
138
137
  # Not specified. Will return result Google::Rpc::Code::INVALID_ARGUMENT.
139
138
  ENCODING_UNSPECIFIED = 0
140
139
 
141
140
  # Uncompressed 16-bit signed little-endian samples (Linear PCM).
142
- # This is the only encoding that may be used by +AsyncRecognize+.
143
141
  LINEAR16 = 1
144
142
 
145
- # This is the recommended encoding for +SyncRecognize+ and
146
- # +StreamingRecognize+ because it uses lossless compression; therefore
147
- # recognition accuracy is not compromised by a lossy codec.
148
- #
149
- # The stream FLAC (Free Lossless Audio Codec) encoding is specified at:
150
- # http://flac.sourceforge.net/documentation.html.
151
- # 16-bit and 24-bit samples are supported.
152
- # Not all fields in STREAMINFO are supported.
143
+ # {+FLAC+}[https://xiph.org/flac/documentation.html] (Free Lossless Audio
144
+ # Codec) is the recommended encoding because it is
145
+ # lossless--therefore recognition is not compromised--and
146
+ # requires only about half the bandwidth of +LINEAR16+. +FLAC+ stream
147
+ # encoding supports 16-bit and 24-bit samples, however, not all fields in
148
+ # +STREAMINFO+ are supported.
153
149
  FLAC = 2
154
150
 
155
151
  # 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
156
152
  MULAW = 3
157
153
 
158
- # Adaptive Multi-Rate Narrowband codec. +sample_rate+ must be 8000 Hz.
154
+ # Adaptive Multi-Rate Narrowband codec. +sample_rate_hertz+ must be 8000.
159
155
  AMR = 4
160
156
 
161
- # Adaptive Multi-Rate Wideband codec. +sample_rate+ must be 16000 Hz.
157
+ # Adaptive Multi-Rate Wideband codec. +sample_rate_hertz+ must be 16000.
162
158
  AMR_WB = 5
159
+
160
+ # Opus encoded audio frames in Ogg container
161
+ # ({OggOpus}[https://wiki.xiph.org/OggOpus]).
162
+ # +sample_rate_hertz+ must be 16000.
163
+ OGG_OPUS = 6
164
+
165
+ # Although the use of lossy encodings is not recommended, if a very low
166
+ # bitrate encoding is required, +OGG_OPUS+ is highly preferred over
167
+ # Speex encoding. The {Speex}[https://speex.org/] encoding supported by
168
+ # Cloud Speech API has a header byte in each block, as in MIME type
169
+ # +audio/x-speex-with-header-byte+.
170
+ # It is a variant of the RTP Speex encoding defined in
171
+ # {RFC 5574}[https://tools.ietf.org/html/rfc5574].
172
+ # The stream is a sequence of blocks, one block per RTP packet. Each block
173
+ # starts with a byte containing the length of the block, in bytes, followed
174
+ # by one or more frames of Speex data, padded to an integral number of
175
+ # bytes (octets) as specified in RFC 5574. In other words, each RTP header
176
+ # is replaced with a single byte containing the block length. Only Speex
177
+ # wideband is supported. +sample_rate_hertz+ must be 16000.
178
+ SPEEX_WITH_HEADER_BYTE = 7
163
179
  end
164
180
  end
165
181
 
@@ -167,7 +183,7 @@ module Google
167
183
  # in the results.
168
184
  # @!attribute [rw] phrases
169
185
  # @return [Array<String>]
170
- # [Optional] A list of strings containing words and phrases "hints" so that
186
+ # *Optional* A list of strings containing words and phrases "hints" so that
171
187
  # the speech recognition is more likely to recognize them. This can be used
172
188
  # to improve the accuracy for specific words and phrases, for example, if
173
189
  # specific commands are typically spoken by the user. This can also be used
@@ -194,30 +210,29 @@ module Google
194
210
  # {Request URIs}[https://cloud.google.com/storage/docs/reference-uris].
195
211
  class RecognitionAudio; end
196
212
 
197
- # +SyncRecognizeResponse+ is the only message returned to the client by
198
- # +SyncRecognize+. It contains the result as zero or more sequential
199
- # +SpeechRecognitionResult+ messages.
213
+ # The only message returned to the client by the +Recognize+ method. It
214
+ # contains the result as zero or more sequential +SpeechRecognitionResult+
215
+ # messages.
200
216
  # @!attribute [rw] results
201
- # @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionResult>]
202
- # [Output-only] Sequential list of transcription results corresponding to
217
+ # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionResult>]
218
+ # *Output-only* Sequential list of transcription results corresponding to
203
219
  # sequential portions of audio.
204
- class SyncRecognizeResponse; end
220
+ class RecognizeResponse; end
205
221
 
206
- # +AsyncRecognizeResponse+ is the only message returned to the client by
207
- # +AsyncRecognize+. It contains the result as zero or more sequential
208
- # +SpeechRecognitionResult+ messages. It is included in the +result.response+
209
- # field of the +Operation+ returned by the +GetOperation+ call of the
210
- # +google::longrunning::Operations+ service.
222
+ # The only message returned to the client by the +LongRunningRecognize+ method.
223
+ # It contains the result as zero or more sequential +SpeechRecognitionResult+
224
+ # messages. It is included in the +result.response+ field of the +Operation+
225
+ # returned by the +GetOperation+ call of the +google::longrunning::Operations+
226
+ # service.
211
227
  # @!attribute [rw] results
212
- # @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionResult>]
213
- # [Output-only] Sequential list of transcription results corresponding to
228
+ # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionResult>]
229
+ # *Output-only* Sequential list of transcription results corresponding to
214
230
  # sequential portions of audio.
215
- class AsyncRecognizeResponse; end
231
+ class LongRunningRecognizeResponse; end
216
232
 
217
- # +AsyncRecognizeMetadata+ describes the progress of a long-running
218
- # +AsyncRecognize+ call. It is included in the +metadata+ field of the
219
- # +Operation+ returned by the +GetOperation+ call of the
220
- # +google::longrunning::Operations+ service.
233
+ # Describes the progress of a long-running +LongRunningRecognize+ call. It is
234
+ # included in the +metadata+ field of the +Operation+ returned by the
235
+ # +GetOperation+ call of the +google::longrunning::Operations+ service.
221
236
  # @!attribute [rw] progress_percent
222
237
  # @return [Integer]
223
238
  # Approximate percentage of audio processed thus far. Guaranteed to be 100
@@ -228,7 +243,7 @@ module Google
228
243
  # @!attribute [rw] last_update_time
229
244
  # @return [Google::Protobuf::Timestamp]
230
245
  # Time of the most recent processing update.
231
- class AsyncRecognizeMetadata; end
246
+ class LongRunningRecognizeMetadata; end
232
247
 
233
248
  # +StreamingRecognizeResponse+ is the only message returned to the client by
234
249
  # +StreamingRecognize+. A series of one or more +StreamingRecognizeResponse+
@@ -237,139 +252,120 @@ module Google
237
252
  # Here's an example of a series of ten +StreamingRecognizeResponse+s that might
238
253
  # be returned while processing audio:
239
254
  #
240
- # 1. endpointer_type: START_OF_SPEECH
255
+ # 1. results { alternatives { transcript: "tube" } stability: 0.01 }
241
256
  #
242
- # 2. results { alternatives { transcript: "tube" } stability: 0.01 }
243
- # result_index: 0
257
+ # 2. results { alternatives { transcript: "to be a" } stability: 0.01 }
244
258
  #
245
- # 3. results { alternatives { transcript: "to be a" } stability: 0.01 }
246
- # result_index: 0
247
- #
248
- # 4. results { alternatives { transcript: "to be" } stability: 0.9 }
259
+ # 3. results { alternatives { transcript: "to be" } stability: 0.9 }
249
260
  # results { alternatives { transcript: " or not to be" } stability: 0.01 }
250
- # result_index: 0
251
261
  #
252
- # 5. results { alternatives { transcript: "to be or not to be"
262
+ # 4. results { alternatives { transcript: "to be or not to be"
253
263
  # confidence: 0.92 }
254
264
  # alternatives { transcript: "to bee or not to bee" }
255
265
  # is_final: true }
256
- # result_index: 0
257
266
  #
258
- # 6. results { alternatives { transcript: " that's" } stability: 0.01 }
259
- # result_index: 1
267
+ # 5. results { alternatives { transcript: " that's" } stability: 0.01 }
260
268
  #
261
- # 7. results { alternatives { transcript: " that is" } stability: 0.9 }
269
+ # 6. results { alternatives { transcript: " that is" } stability: 0.9 }
262
270
  # results { alternatives { transcript: " the question" } stability: 0.01 }
263
- # result_index: 1
264
271
  #
265
- # 8. endpointer_type: END_OF_SPEECH
272
+ # 7. speech_event_type: END_OF_SINGLE_UTTERANCE
266
273
  #
267
- # 9. results { alternatives { transcript: " that is the question"
274
+ # 8. results { alternatives { transcript: " that is the question"
268
275
  # confidence: 0.98 }
269
276
  # alternatives { transcript: " that was the question" }
270
277
  # is_final: true }
271
- # result_index: 1
272
- #
273
- # 10. endpointer_type: END_OF_AUDIO
274
278
  #
275
279
  # Notes:
276
280
  #
277
- # - Only two of the above responses #5 and #9 contain final results, they are
281
+ # - Only two of the above responses #4 and #8 contain final results; they are
278
282
  # indicated by +is_final: true+. Concatenating these together generates the
279
283
  # full transcript: "to be or not to be that is the question".
280
284
  #
281
- # - The others contain interim +results+. #4 and #7 contain two interim
282
- # +results+, the first portion has a high stability and is less likely to
283
- # change, the second portion has a low stability and is very likely to
285
+ # - The others contain interim +results+. #3 and #6 contain two interim
286
+ # +results+: the first portion has a high stability and is less likely to
287
+ # change; the second portion has a low stability and is very likely to
284
288
  # change. A UI designer might choose to show only high stability +results+.
285
289
  #
286
- # - The +result_index+ indicates the portion of audio that has had final
287
- # results returned, and is no longer being processed. For example, the
288
- # +results+ in #6 and later correspond to the portion of audio after
289
- # "to be or not to be".
290
+ # - The specific +stability+ and +confidence+ values shown above are only for
291
+ # illustrative purposes. Actual values may vary.
292
+ #
293
+ # - In each response, only one of these fields will be set:
294
+ # +error+,
295
+ # +speech_event_type+, or
296
+ # one or more (repeated) +results+.
290
297
  # @!attribute [rw] error
291
298
  # @return [Google::Rpc::Status]
292
- # [Output-only] If set, returns a Google::Rpc::Status message that
299
+ # *Output-only* If set, returns a Google::Rpc::Status message that
293
300
  # specifies the error for the operation.
294
301
  # @!attribute [rw] results
295
- # @return [Array<Google::Cloud::Speech::V1beta1::StreamingRecognitionResult>]
296
- # [Output-only] This repeated list contains zero or more results that
302
+ # @return [Array<Google::Cloud::Speech::V1::StreamingRecognitionResult>]
303
+ # *Output-only* This repeated list contains zero or more results that
297
304
  # correspond to consecutive portions of the audio currently being processed.
298
305
  # It contains zero or one +is_final=true+ result (the newly settled portion),
299
306
  # followed by zero or more +is_final=false+ results.
300
- # @!attribute [rw] result_index
301
- # @return [Integer]
302
- # [Output-only] Indicates the lowest index in the +results+ array that has
303
- # changed. The repeated +StreamingRecognitionResult+ results overwrite past
304
- # results at this index and higher.
305
- # @!attribute [rw] endpointer_type
306
- # @return [Google::Cloud::Speech::V1beta1::StreamingRecognizeResponse::EndpointerType]
307
- # [Output-only] Indicates the type of endpointer event.
307
+ # @!attribute [rw] speech_event_type
308
+ # @return [Google::Cloud::Speech::V1::StreamingRecognizeResponse::SpeechEventType]
309
+ # *Output-only* Indicates the type of speech event.
308
310
  class StreamingRecognizeResponse
309
- # Indicates the type of endpointer event.
310
- module EndpointerType
311
- # No endpointer event specified.
312
- ENDPOINTER_EVENT_UNSPECIFIED = 0
313
-
314
- # Speech has been detected in the audio stream.
315
- START_OF_SPEECH = 1
316
-
317
- # Speech has ceased to be detected in the audio stream.
318
- END_OF_SPEECH = 2
319
-
320
- # The end of the audio stream has been reached. and it is being processed.
321
- END_OF_AUDIO = 3
311
+ # Indicates the type of speech event.
312
+ module SpeechEventType
313
+ # No speech event specified.
314
+ SPEECH_EVENT_UNSPECIFIED = 0
322
315
 
323
- # This event is only sent when +single_utterance+ is +true+. It indicates
324
- # that the server has detected the end of the user's speech utterance and
325
- # expects no additional speech. Therefore, the server will not process
326
- # additional audio. The client should stop sending additional audio data.
327
- END_OF_UTTERANCE = 4
316
+ # This event indicates that the server has detected the end of the user's
317
+ # speech utterance and expects no additional speech. Therefore, the server
318
+ # will not process additional audio (although it may subsequently return
319
+ # additional results). The client should stop sending additional audio
320
+ # data, half-close the gRPC connection, and wait for any additional results
321
+ # until the server closes the gRPC connection. This event is only sent if
322
+ # +single_utterance+ was set to +true+, and is not used otherwise.
323
+ END_OF_SINGLE_UTTERANCE = 1
328
324
  end
329
325
  end
330
326
 
331
327
  # A streaming speech recognition result corresponding to a portion of the audio
332
328
  # that is currently being processed.
333
329
  # @!attribute [rw] alternatives
334
- # @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionAlternative>]
335
- # [Output-only] May contain one or more recognition hypotheses (up to the
330
+ # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
331
+ # *Output-only* May contain one or more recognition hypotheses (up to the
336
332
  # maximum specified in +max_alternatives+).
337
333
  # @!attribute [rw] is_final
338
334
  # @return [true, false]
339
- # [Output-only] If +false+, this +StreamingRecognitionResult+ represents an
335
+ # *Output-only* If +false+, this +StreamingRecognitionResult+ represents an
340
336
  # interim result that may change. If +true+, this is the final time the
341
337
  # speech service will return this particular +StreamingRecognitionResult+,
342
338
  # the recognizer will not return any further hypotheses for this portion of
343
339
  # the transcript and corresponding audio.
344
340
  # @!attribute [rw] stability
345
341
  # @return [Float]
346
- # [Output-only] An estimate of the probability that the recognizer will not
342
+ # *Output-only* An estimate of the likelihood that the recognizer will not
347
343
  # change its guess about this interim result. Values range from 0.0
348
- # (completely unstable) to 1.0 (completely stable). Note that this is not the
349
- # same as +confidence+, which estimates the probability that a recognition
350
- # result is correct.
344
+ # (completely unstable) to 1.0 (completely stable).
351
345
  # This field is only provided for interim results (+is_final=false+).
352
- # The default of 0.0 is a sentinel value indicating stability was not set.
346
+ # The default of 0.0 is a sentinel value indicating +stability+ was not set.
353
347
  class StreamingRecognitionResult; end
354
348
 
355
349
  # A speech recognition result corresponding to a portion of the audio.
356
350
  # @!attribute [rw] alternatives
357
- # @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionAlternative>]
358
- # [Output-only] May contain one or more recognition hypotheses (up to the
351
+ # @return [Array<Google::Cloud::Speech::V1::SpeechRecognitionAlternative>]
352
+ # *Output-only* May contain one or more recognition hypotheses (up to the
359
353
  # maximum specified in +max_alternatives+).
360
354
  class SpeechRecognitionResult; end
361
355
 
362
356
  # Alternative hypotheses (a.k.a. n-best list).
363
357
  # @!attribute [rw] transcript
364
358
  # @return [String]
365
- # [Output-only] Transcript text representing the words that the user spoke.
359
+ # *Output-only* Transcript text representing the words that the user spoke.
366
360
  # @!attribute [rw] confidence
367
361
  # @return [Float]
368
- # [Output-only] The confidence estimate between 0.0 and 1.0. A higher number
369
- # means the system is more confident that the recognition is correct.
370
- # This field is typically provided only for the top hypothesis, and only for
371
- # +is_final=true+ results.
372
- # The default of 0.0 is a sentinel value indicating confidence was not set.
362
+ # *Output-only* The confidence estimate between 0.0 and 1.0. A higher number
363
+ # indicates an estimated greater likelihood that the recognized words are
364
+ # correct. This field is typically provided only for the top hypothesis, and
365
+ # only for +is_final=true+ results. Clients should not rely on the
366
+ # +confidence+ field as it is not guaranteed to be accurate, or even set, in
367
+ # any of the results.
368
+ # The default of 0.0 is a sentinel value indicating +confidence+ was not set.
373
369
  class SpeechRecognitionAlternative; end
374
370
  end
375
371
  end