google-cloud-speech 0.29.0 → 0.30.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -1
  3. data/LICENSE +1 -1
  4. data/README.md +69 -43
  5. data/lib/google/cloud/speech.rb +94 -252
  6. data/lib/google/cloud/speech/v1.rb +11 -1
  7. data/lib/google/cloud/speech/v1/cloud_speech_services_pb.rb +1 -1
  8. data/lib/google/cloud/speech/{version.rb → v1/credentials.rb} +12 -2
  9. data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +2 -2
  10. data/lib/google/cloud/speech/v1/doc/google/longrunning/operations.rb +92 -0
  11. data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +1 -1
  12. data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +1 -1
  13. data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +1 -1
  14. data/lib/google/cloud/speech/v1/doc/overview.rb +1 -1
  15. data/lib/google/cloud/speech/v1/helpers.rb +93 -0
  16. data/lib/google/cloud/speech/v1/speech_client.rb +26 -49
  17. data/lib/google/cloud/speech/v1/speech_client_config.json +5 -5
  18. data/lib/google/cloud/speech/v1/stream.rb +614 -0
  19. data/lib/google/cloud/speech/v1p1beta1.rb +126 -0
  20. data/lib/google/cloud/speech/v1p1beta1/cloud_speech_pb.rb +175 -0
  21. data/lib/google/cloud/speech/v1p1beta1/cloud_speech_services_pb.rb +54 -0
  22. data/lib/google/cloud/speech/v1p1beta1/credentials.rb +32 -0
  23. data/lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb +625 -0
  24. data/lib/google/cloud/speech/v1p1beta1/doc/google/longrunning/operations.rb +92 -0
  25. data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/any.rb +124 -0
  26. data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/duration.rb +90 -0
  27. data/lib/google/cloud/speech/v1p1beta1/doc/google/rpc/status.rb +83 -0
  28. data/lib/google/cloud/speech/v1p1beta1/doc/overview.rb +73 -0
  29. data/lib/google/cloud/speech/v1p1beta1/helpers.rb +93 -0
  30. data/lib/google/cloud/speech/v1p1beta1/speech_client.rb +322 -0
  31. data/lib/google/cloud/speech/v1p1beta1/speech_client_config.json +41 -0
  32. data/lib/google/cloud/speech/v1p1beta1/stream.rb +614 -0
  33. metadata +29 -120
  34. data/lib/google-cloud-speech.rb +0 -142
  35. data/lib/google/cloud/speech/audio.rb +0 -330
  36. data/lib/google/cloud/speech/convert.rb +0 -46
  37. data/lib/google/cloud/speech/credentials.rb +0 -57
  38. data/lib/google/cloud/speech/operation.rb +0 -262
  39. data/lib/google/cloud/speech/project.rb +0 -651
  40. data/lib/google/cloud/speech/result.rb +0 -240
  41. data/lib/google/cloud/speech/service.rb +0 -121
  42. data/lib/google/cloud/speech/stream.rb +0 -564
@@ -0,0 +1,126 @@
1
+ # Copyright 2018 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ require "google/cloud/speech/v1p1beta1/speech_client"
16
+ require "google/cloud/speech/v1p1beta1/helpers"
17
+
18
+ module Google
19
+ module Cloud
20
+ # rubocop:disable LineLength
21
+
22
+ ##
23
+ # # Ruby Client for Cloud Speech API ([Alpha](https://github.com/GoogleCloudPlatform/google-cloud-ruby#versioning))
24
+ #
25
+ # [Cloud Speech API][Product Documentation]:
26
+ # Converts audio to text by applying powerful neural network models.
27
+ # - [Product Documentation][]
28
+ #
29
+ # ## Quick Start
30
+ # In order to use this library, you first need to go through the following
31
+ # steps:
32
+ #
33
+ # 1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project)
34
+ # 2. [Enable billing for your project.](https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project)
35
+ # 3. [Enable the Cloud Speech API.](https://console.cloud.google.com/apis/api/speech)
36
+ # 4. [Setup Authentication.](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud/master/guides/authentication)
37
+ #
38
+ # ### Preview
39
+ # #### SpeechClient
40
+ # ```rb
41
+ # require "google/cloud/speech/v1p1beta1"
42
+ #
43
+ # speech_client = Google::Cloud::Speech::V1p1beta1.new
44
+ # language_code = "en-US"
45
+ # sample_rate_hertz = 44100
46
+ # encoding = :FLAC
47
+ # config = {
48
+ # language_code: language_code,
49
+ # sample_rate_hertz: sample_rate_hertz,
50
+ # encoding: encoding
51
+ # }
52
+ # uri = "gs://gapic-toolkit/hello.flac"
53
+ # audio = { uri: uri }
54
+ # response = speech_client.recognize(config, audio)
55
+ # ```
56
+ #
57
+ # ### Next Steps
58
+ # - Read the [Cloud Speech API Product documentation][Product Documentation]
59
+ # to learn more about the product and see How-to Guides.
60
+ # - View this [repository's main README](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/README.md)
61
+ # to see the full list of Cloud APIs that we cover.
62
+ #
63
+ # [Product Documentation]: https://cloud.google.com/speech
64
+ #
65
+ #
66
+ module Speech
67
+ module V1p1beta1
68
+ # rubocop:enable LineLength
69
+
70
+ ##
71
+ # Service that implements Google Cloud Speech API.
72
+ #
73
+ # @param credentials [Google::Auth::Credentials, String, Hash, GRPC::Core::Channel, GRPC::Core::ChannelCredentials, Proc]
74
+ # Provides the means for authenticating requests made by the client. This parameter can
75
+ # be many types.
76
+ # A `Google::Auth::Credentials` uses a the properties of its represented keyfile for
77
+ # authenticating requests made by this client.
78
+ # A `String` will be treated as the path to the keyfile to be used for the construction of
79
+ # credentials for this client.
80
+ # A `Hash` will be treated as the contents of a keyfile to be used for the construction of
81
+ # credentials for this client.
82
+ # A `GRPC::Core::Channel` will be used to make calls through.
83
+ # A `GRPC::Core::ChannelCredentials` for the setting up the RPC client. The channel credentials
84
+ # should already be composed with a `GRPC::Core::CallCredentials` object.
85
+ # A `Proc` will be used as an updater_proc for the Grpc channel. The proc transforms the
86
+ # metadata for requests, generally, to give OAuth credentials.
87
+ # @param scopes [Array<String>]
88
+ # The OAuth scopes for this service. This parameter is ignored if
89
+ # an updater_proc is supplied.
90
+ # @param client_config [Hash]
91
+ # A Hash for call options for each method. See
92
+ # Google::Gax#construct_settings for the structure of
93
+ # this data. Falls back to the default config if not specified
94
+ # or the specified config is missing data points.
95
+ # @param timeout [Numeric]
96
+ # The default timeout, in seconds, for calls made through this client.
97
+ # @param metadata [Hash]
98
+ # Default metadata to be sent with each request. This can be overridden on a per call basis.
99
+ # @param exception_transformer [Proc]
100
+ # An optional proc that intercepts any exceptions raised during an API call to inject
101
+ # custom error handling.
102
+ def self.new \
103
+ credentials: nil,
104
+ scopes: nil,
105
+ client_config: nil,
106
+ timeout: nil,
107
+ metadata: nil,
108
+ exception_transformer: nil,
109
+ lib_name: nil,
110
+ lib_version: nil
111
+ kwargs = {
112
+ credentials: credentials,
113
+ scopes: scopes,
114
+ client_config: client_config,
115
+ timeout: timeout,
116
+ metadata: metadata,
117
+ exception_transformer: exception_transformer,
118
+ lib_name: lib_name,
119
+ lib_version: lib_version
120
+ }.select { |_, v| v != nil }
121
+ Google::Cloud::Speech::V1p1beta1::SpeechClient.new(**kwargs)
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,175 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # source: google/cloud/speech/v1p1beta1/cloud_speech.proto
3
+
4
+ require 'google/protobuf'
5
+
6
+ require 'google/api/annotations_pb'
7
+ require 'google/longrunning/operations_pb'
8
+ require 'google/protobuf/any_pb'
9
+ require 'google/protobuf/duration_pb'
10
+ require 'google/protobuf/timestamp_pb'
11
+ require 'google/rpc/status_pb'
12
+ Google::Protobuf::DescriptorPool.generated_pool.build do
13
+ add_message "google.cloud.speech.v1p1beta1.RecognizeRequest" do
14
+ optional :config, :message, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig"
15
+ optional :audio, :message, 2, "google.cloud.speech.v1p1beta1.RecognitionAudio"
16
+ end
17
+ add_message "google.cloud.speech.v1p1beta1.LongRunningRecognizeRequest" do
18
+ optional :config, :message, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig"
19
+ optional :audio, :message, 2, "google.cloud.speech.v1p1beta1.RecognitionAudio"
20
+ end
21
+ add_message "google.cloud.speech.v1p1beta1.StreamingRecognizeRequest" do
22
+ oneof :streaming_request do
23
+ optional :streaming_config, :message, 1, "google.cloud.speech.v1p1beta1.StreamingRecognitionConfig"
24
+ optional :audio_content, :bytes, 2
25
+ end
26
+ end
27
+ add_message "google.cloud.speech.v1p1beta1.StreamingRecognitionConfig" do
28
+ optional :config, :message, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig"
29
+ optional :single_utterance, :bool, 2
30
+ optional :interim_results, :bool, 3
31
+ end
32
+ add_message "google.cloud.speech.v1p1beta1.RecognitionConfig" do
33
+ optional :encoding, :enum, 1, "google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding"
34
+ optional :sample_rate_hertz, :int32, 2
35
+ optional :language_code, :string, 3
36
+ optional :max_alternatives, :int32, 4
37
+ optional :profanity_filter, :bool, 5
38
+ repeated :speech_contexts, :message, 6, "google.cloud.speech.v1p1beta1.SpeechContext"
39
+ optional :enable_word_time_offsets, :bool, 8
40
+ optional :enable_automatic_punctuation, :bool, 11
41
+ optional :metadata, :message, 9, "google.cloud.speech.v1p1beta1.RecognitionMetadata"
42
+ optional :model, :string, 13
43
+ optional :use_enhanced, :bool, 14
44
+ end
45
+ add_enum "google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding" do
46
+ value :ENCODING_UNSPECIFIED, 0
47
+ value :LINEAR16, 1
48
+ value :FLAC, 2
49
+ value :MULAW, 3
50
+ value :AMR, 4
51
+ value :AMR_WB, 5
52
+ value :OGG_OPUS, 6
53
+ value :SPEEX_WITH_HEADER_BYTE, 7
54
+ end
55
+ add_message "google.cloud.speech.v1p1beta1.RecognitionMetadata" do
56
+ optional :interaction_type, :enum, 1, "google.cloud.speech.v1p1beta1.RecognitionMetadata.InteractionType"
57
+ optional :industry_naics_code_of_audio, :uint32, 3
58
+ optional :microphone_distance, :enum, 4, "google.cloud.speech.v1p1beta1.RecognitionMetadata.MicrophoneDistance"
59
+ optional :original_media_type, :enum, 5, "google.cloud.speech.v1p1beta1.RecognitionMetadata.OriginalMediaType"
60
+ optional :recording_device_type, :enum, 6, "google.cloud.speech.v1p1beta1.RecognitionMetadata.RecordingDeviceType"
61
+ optional :recording_device_name, :string, 7
62
+ optional :original_mime_type, :string, 8
63
+ optional :obfuscated_id, :int64, 9
64
+ optional :audio_topic, :string, 10
65
+ end
66
+ add_enum "google.cloud.speech.v1p1beta1.RecognitionMetadata.InteractionType" do
67
+ value :INTERACTION_TYPE_UNSPECIFIED, 0
68
+ value :DISCUSSION, 1
69
+ value :PRESENTATION, 2
70
+ value :PHONE_CALL, 3
71
+ value :VOICEMAIL, 4
72
+ value :PROFESSIONALLY_PRODUCED, 5
73
+ value :VOICE_SEARCH, 6
74
+ value :VOICE_COMMAND, 7
75
+ value :DICTATION, 8
76
+ end
77
+ add_enum "google.cloud.speech.v1p1beta1.RecognitionMetadata.MicrophoneDistance" do
78
+ value :MICROPHONE_DISTANCE_UNSPECIFIED, 0
79
+ value :NEARFIELD, 1
80
+ value :MIDFIELD, 2
81
+ value :FARFIELD, 3
82
+ end
83
+ add_enum "google.cloud.speech.v1p1beta1.RecognitionMetadata.OriginalMediaType" do
84
+ value :ORIGINAL_MEDIA_TYPE_UNSPECIFIED, 0
85
+ value :AUDIO, 1
86
+ value :VIDEO, 2
87
+ end
88
+ add_enum "google.cloud.speech.v1p1beta1.RecognitionMetadata.RecordingDeviceType" do
89
+ value :RECORDING_DEVICE_TYPE_UNSPECIFIED, 0
90
+ value :SMARTPHONE, 1
91
+ value :PC, 2
92
+ value :PHONE_LINE, 3
93
+ value :VEHICLE, 4
94
+ value :OTHER_OUTDOOR_DEVICE, 5
95
+ value :OTHER_INDOOR_DEVICE, 6
96
+ end
97
+ add_message "google.cloud.speech.v1p1beta1.SpeechContext" do
98
+ repeated :phrases, :string, 1
99
+ end
100
+ add_message "google.cloud.speech.v1p1beta1.RecognitionAudio" do
101
+ oneof :audio_source do
102
+ optional :content, :bytes, 1
103
+ optional :uri, :string, 2
104
+ end
105
+ end
106
+ add_message "google.cloud.speech.v1p1beta1.RecognizeResponse" do
107
+ repeated :results, :message, 2, "google.cloud.speech.v1p1beta1.SpeechRecognitionResult"
108
+ end
109
+ add_message "google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse" do
110
+ repeated :results, :message, 2, "google.cloud.speech.v1p1beta1.SpeechRecognitionResult"
111
+ end
112
+ add_message "google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata" do
113
+ optional :progress_percent, :int32, 1
114
+ optional :start_time, :message, 2, "google.protobuf.Timestamp"
115
+ optional :last_update_time, :message, 3, "google.protobuf.Timestamp"
116
+ end
117
+ add_message "google.cloud.speech.v1p1beta1.StreamingRecognizeResponse" do
118
+ optional :error, :message, 1, "google.rpc.Status"
119
+ repeated :results, :message, 2, "google.cloud.speech.v1p1beta1.StreamingRecognitionResult"
120
+ optional :speech_event_type, :enum, 4, "google.cloud.speech.v1p1beta1.StreamingRecognizeResponse.SpeechEventType"
121
+ end
122
+ add_enum "google.cloud.speech.v1p1beta1.StreamingRecognizeResponse.SpeechEventType" do
123
+ value :SPEECH_EVENT_UNSPECIFIED, 0
124
+ value :END_OF_SINGLE_UTTERANCE, 1
125
+ end
126
+ add_message "google.cloud.speech.v1p1beta1.StreamingRecognitionResult" do
127
+ repeated :alternatives, :message, 1, "google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative"
128
+ optional :is_final, :bool, 2
129
+ optional :stability, :float, 3
130
+ end
131
+ add_message "google.cloud.speech.v1p1beta1.SpeechRecognitionResult" do
132
+ repeated :alternatives, :message, 1, "google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative"
133
+ end
134
+ add_message "google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative" do
135
+ optional :transcript, :string, 1
136
+ optional :confidence, :float, 2
137
+ repeated :words, :message, 3, "google.cloud.speech.v1p1beta1.WordInfo"
138
+ end
139
+ add_message "google.cloud.speech.v1p1beta1.WordInfo" do
140
+ optional :start_time, :message, 1, "google.protobuf.Duration"
141
+ optional :end_time, :message, 2, "google.protobuf.Duration"
142
+ optional :word, :string, 3
143
+ end
144
+ end
145
+
146
+ module Google
147
+ module Cloud
148
+ module Speech
149
+ module V1p1beta1
150
+ RecognizeRequest = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognizeRequest").msgclass
151
+ LongRunningRecognizeRequest = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.LongRunningRecognizeRequest").msgclass
152
+ StreamingRecognizeRequest = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognizeRequest").msgclass
153
+ StreamingRecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognitionConfig").msgclass
154
+ RecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionConfig").msgclass
155
+ RecognitionConfig::AudioEncoding = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding").enummodule
156
+ RecognitionMetadata = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionMetadata").msgclass
157
+ RecognitionMetadata::InteractionType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionMetadata.InteractionType").enummodule
158
+ RecognitionMetadata::MicrophoneDistance = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionMetadata.MicrophoneDistance").enummodule
159
+ RecognitionMetadata::OriginalMediaType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionMetadata.OriginalMediaType").enummodule
160
+ RecognitionMetadata::RecordingDeviceType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionMetadata.RecordingDeviceType").enummodule
161
+ SpeechContext = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.SpeechContext").msgclass
162
+ RecognitionAudio = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognitionAudio").msgclass
163
+ RecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.RecognizeResponse").msgclass
164
+ LongRunningRecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse").msgclass
165
+ LongRunningRecognizeMetadata = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata").msgclass
166
+ StreamingRecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognizeResponse").msgclass
167
+ StreamingRecognizeResponse::SpeechEventType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognizeResponse.SpeechEventType").enummodule
168
+ StreamingRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.StreamingRecognitionResult").msgclass
169
+ SpeechRecognitionResult = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.SpeechRecognitionResult").msgclass
170
+ SpeechRecognitionAlternative = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative").msgclass
171
+ WordInfo = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1p1beta1.WordInfo").msgclass
172
+ end
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,54 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # Source: google/cloud/speech/v1p1beta1/cloud_speech.proto for package 'google.cloud.speech.v1p1beta1'
3
+ # Original file comments:
4
+ # Copyright 2018 Google Inc.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
19
+ require 'grpc'
20
+ require 'google/cloud/speech/v1p1beta1/cloud_speech_pb'
21
+
22
+ module Google
23
+ module Cloud
24
+ module Speech
25
+ module V1p1beta1
26
+ module Speech
27
+ # Service that implements Google Cloud Speech API.
28
+ class Service
29
+
30
+ include GRPC::GenericService
31
+
32
+ self.marshal_class_method = :encode
33
+ self.unmarshal_class_method = :decode
34
+ self.service_name = 'google.cloud.speech.v1p1beta1.Speech'
35
+
36
+ # Performs synchronous speech recognition: receive results after all audio
37
+ # has been sent and processed.
38
+ rpc :Recognize, RecognizeRequest, RecognizeResponse
39
+ # Performs asynchronous speech recognition: receive results via the
40
+ # google.longrunning.Operations interface. Returns either an
41
+ # `Operation.error` or an `Operation.response` which contains
42
+ # a `LongRunningRecognizeResponse` message.
43
+ rpc :LongRunningRecognize, LongRunningRecognizeRequest, Google::Longrunning::Operation
44
+ # Performs bidirectional streaming speech recognition: receive results while
45
+ # sending audio. This method is only available via the gRPC API (not REST).
46
+ rpc :StreamingRecognize, stream(StreamingRecognizeRequest), stream(StreamingRecognizeResponse)
47
+ end
48
+
49
+ Stub = Service.rpc_stub_class
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,32 @@
1
+ # Copyright 2018 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ require "googleauth"
16
+
17
+ module Google
18
+ module Cloud
19
+ module Speech
20
+ module V1p1beta1
21
+ class Credentials < Google::Auth::Credentials
22
+ SCOPE = [
23
+ "https://www.googleapis.com/auth/cloud-platform"
24
+ ].freeze
25
+ PATH_ENV_VARS = %w(SPEECH_KEYFILE GOOGLE_CLOUD_KEYFILE GCLOUD_KEYFILE)
26
+ JSON_ENV_VARS = %w(SPEECH_KEYFILE_JSON GOOGLE_CLOUD_KEYFILE_JSON GCLOUD_KEYFILE_JSON)
27
+ DEFAULT_PATHS = ["~/.config/gcloud/application_default_credentials.json"]
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,625 @@
1
+ # Copyright 2018 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Google
16
+ module Cloud
17
+ module Speech
18
+ ##
19
+ # # Cloud Speech API Contents
20
+ #
21
+ # | Class | Description |
22
+ # | ----- | ----------- |
23
+ # | [SpeechClient][] | Service that implements Google Cloud Speech API. |
24
+ # | [Data Types][] | Data types for Google::Cloud::Speech::V1p1beta1 |
25
+ #
26
+ # [SpeechClient]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1p1beta1/speechclient
27
+ # [Data Types]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1p1beta1/datatypes
28
+ #
29
+ module V1p1beta1
30
+ # The top-level message sent by the client for the +Recognize+ method.
31
+ # @!attribute [rw] config
32
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig]
33
+ # *Required* Provides information to the recognizer that specifies how to
34
+ # process the request.
35
+ # @!attribute [rw] audio
36
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionAudio]
37
+ # *Required* The audio data to be recognized.
38
+ class RecognizeRequest; end
39
+
40
+ # The top-level message sent by the client for the +LongRunningRecognize+
41
+ # method.
42
+ # @!attribute [rw] config
43
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig]
44
+ # *Required* Provides information to the recognizer that specifies how to
45
+ # process the request.
46
+ # @!attribute [rw] audio
47
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionAudio]
48
+ # *Required* The audio data to be recognized.
49
+ class LongRunningRecognizeRequest; end
50
+
51
+ # The top-level message sent by the client for the +StreamingRecognize+ method.
52
+ # Multiple +StreamingRecognizeRequest+ messages are sent. The first message
53
+ # must contain a +streaming_config+ message and must not contain +audio+ data.
54
+ # All subsequent messages must contain +audio+ data and must not contain a
55
+ # +streaming_config+ message.
56
+ # @!attribute [rw] streaming_config
57
+ # @return [Google::Cloud::Speech::V1p1beta1::StreamingRecognitionConfig]
58
+ # Provides information to the recognizer that specifies how to process the
59
+ # request. The first +StreamingRecognizeRequest+ message must contain a
60
+ # +streaming_config+ message.
61
+ # @!attribute [rw] audio_content
62
+ # @return [String]
63
+ # The audio data to be recognized. Sequential chunks of audio data are sent
64
+ # in sequential +StreamingRecognizeRequest+ messages. The first
65
+ # +StreamingRecognizeRequest+ message must not contain +audio_content+ data
66
+ # and all subsequent +StreamingRecognizeRequest+ messages must contain
67
+ # +audio_content+ data. The audio bytes must be encoded as specified in
68
+ # +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
69
+ # pure binary representation (not base64). See
70
+ # [audio limits](https://cloud.google.com/speech/limits#content).
71
+ class StreamingRecognizeRequest; end
72
+
73
+ # Provides information to the recognizer that specifies how to process the
74
+ # request.
75
+ # @!attribute [rw] config
76
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig]
77
+ # *Required* Provides information to the recognizer that specifies how to
78
+ # process the request.
79
+ # @!attribute [rw] single_utterance
80
+ # @return [true, false]
81
+ # *Optional* If +false+ or omitted, the recognizer will perform continuous
82
+ # recognition (continuing to wait for and process audio even if the user
83
+ # pauses speaking) until the client closes the input stream (gRPC API) or
84
+ # until the maximum time limit has been reached. May return multiple
85
+ # +StreamingRecognitionResult+s with the +is_final+ flag set to +true+.
86
+ #
87
+ # If +true+, the recognizer will detect a single spoken utterance. When it
88
+ # detects that the user has paused or stopped speaking, it will return an
89
+ # +END_OF_SINGLE_UTTERANCE+ event and cease recognition. It will return no
90
+ # more than one +StreamingRecognitionResult+ with the +is_final+ flag set to
91
+ # +true+.
92
+ # @!attribute [rw] interim_results
93
+ # @return [true, false]
94
+ # *Optional* If +true+, interim results (tentative hypotheses) may be
95
+ # returned as they become available (these interim results are indicated with
96
+ # the +is_final=false+ flag).
97
+ # If +false+ or omitted, only +is_final=true+ result(s) are returned.
98
+ class StreamingRecognitionConfig; end
99
+
100
+ # Provides information to the recognizer that specifies how to process the
101
+ # request.
102
+ # @!attribute [rw] encoding
103
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding]
104
+ # Encoding of audio data sent in all +RecognitionAudio+ messages.
105
+ # This field is optional for +FLAC+ and +WAV+ audio files and required
106
+ # for all other audio formats. For details, see {Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
107
+ # @!attribute [rw] sample_rate_hertz
108
+ # @return [Integer]
109
+ # Sample rate in Hertz of the audio data sent in all
110
+ # +RecognitionAudio+ messages. Valid values are: 8000-48000.
111
+ # 16000 is optimal. For best results, set the sampling rate of the audio
112
+ # source to 16000 Hz. If that's not possible, use the native sample rate of
113
+ # the audio source (instead of re-sampling).
114
+ # This field is optional for +FLAC+ and +WAV+ audio files and required
115
+ # for all other audio formats. For details, see {Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
116
+ # @!attribute [rw] language_code
117
+ # @return [String]
118
+ # *Required* The language of the supplied audio as a
119
+ # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
120
+ # Example: "en-US".
121
+ # See [Language Support](https://cloud.google.com/speech/docs/languages)
122
+ # for a list of the currently supported language codes.
123
+ # @!attribute [rw] max_alternatives
124
+ # @return [Integer]
125
+ # *Optional* Maximum number of recognition hypotheses to be returned.
126
+ # Specifically, the maximum number of +SpeechRecognitionAlternative+ messages
127
+ # within each +SpeechRecognitionResult+.
128
+ # The server may return fewer than +max_alternatives+.
129
+ # Valid values are +0+-+30+. A value of +0+ or +1+ will return a maximum of
130
+ # one. If omitted, will return a maximum of one.
131
+ # @!attribute [rw] profanity_filter
132
+ # @return [true, false]
133
+ # *Optional* If set to +true+, the server will attempt to filter out
134
+ # profanities, replacing all but the initial character in each filtered word
135
+ # with asterisks, e.g. "f***". If set to +false+ or omitted, profanities
136
+ # won't be filtered out.
137
+ # @!attribute [rw] speech_contexts
138
+ # @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechContext>]
139
+ # *Optional* A means to provide context to assist the speech recognition.
140
+ # @!attribute [rw] enable_word_time_offsets
141
+ # @return [true, false]
142
+ # *Optional* If +true+, the top result includes a list of words and
143
+ # the start and end time offsets (timestamps) for those words. If
144
+ # +false+, no word-level time offset information is returned. The default is
145
+ # +false+.
146
+ # @!attribute [rw] enable_automatic_punctuation
147
+ # @return [true, false]
148
+ # *Optional* If 'true', adds punctuation to recognition result hypotheses.
149
+ # This feature is only available in select languages. Setting this for
150
+ # requests in other languages has no effect at all.
151
+ # The default 'false' value does not add punctuation to result hypotheses.
152
+ # NOTE: "This is currently offered as an experimental service, complimentary
153
+ # to all users. In the future this may be exclusively available as a
154
+ # premium feature."
155
+ # @!attribute [rw] metadata
156
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionMetadata]
157
+ # *Optional* Metadata regarding this request.
158
+ # @!attribute [rw] model
159
+ # @return [String]
160
+ # *Optional* Which model to select for the given request. Select the model
161
+ # best suited to your domain to get best results. If a model is not
162
+ # explicitly specified, then we auto-select a model based on the parameters
163
+ # in the RecognitionConfig.
164
+ # <table>
165
+ # <tr>
166
+ # <td><b>Model</b></td>
167
+ # <td><b>Description</b></td>
168
+ # </tr>
169
+ # <tr>
170
+ # <td><code>command_and_search</code></td>
171
+ # <td>Best for short queries such as voice commands or voice search.</td>
172
+ # </tr>
173
+ # <tr>
174
+ # <td><code>phone_call</code></td>
175
+ # <td>Best for audio that originated from a phone call (typically
176
+ # recorded at an 8khz sampling rate).</td>
177
+ # </tr>
178
+ # <tr>
179
+ # <td><code>video</code></td>
180
+ # <td>Best for audio that originated from from video or includes multiple
181
+ # speakers. Ideally the audio is recorded at a 16khz or greater
182
+ # sampling rate. This is a premium model that costs more than the
183
+ # standard rate.</td>
184
+ # </tr>
185
+ # <tr>
186
+ # <td><code>default</code></td>
187
+ # <td>Best for audio that is not one of the specific audio models.
188
+ # For example, long-form audio. Ideally the audio is high-fidelity,
189
+ # recorded at a 16khz or greater sampling rate.</td>
190
+ # </tr>
191
+ # </table>
192
+ # @!attribute [rw] use_enhanced
193
+ # @return [true, false]
194
+ # *Optional* Set to true to use an enhanced model for speech recognition.
195
+ # You must also set the +model+ field to a valid, enhanced model. If
196
+ # +use_enhanced+ is set to true and the +model+ field is not set, then
197
+ # +use_enhanced+ is ignored. If +use_enhanced+ is true and an enhanced
198
+ # version of the specified model does not exist, then the speech is
199
+ # recognized using the standard version of the specified model.
200
+ #
201
+ # Enhanced speech models require that you opt-in to the audio logging using
202
+ # instructions in the [alpha documentation](https://cloud.google.com/speech/data-sharing). If you set
203
+ # +use_enhanced+ to true and you have not enabled audio logging, then you
204
+ # will receive an error.
205
+ class RecognitionConfig
206
+ # The encoding of the audio data sent in the request.
207
+ #
208
+ # All encodings support only 1 channel (mono) audio.
209
+ #
210
+ # For best results, the audio source should be captured and transmitted using
211
+ # a lossless encoding (+FLAC+ or +LINEAR16+). The accuracy of the speech
212
+ # recognition can be reduced if lossy codecs are used to capture or transmit
213
+ # audio, particularly if background noise is present. Lossy codecs include
214
+ # +MULAW+, +AMR+, +AMR_WB+, +OGG_OPUS+, and +SPEEX_WITH_HEADER_BYTE+.
215
+ #
216
+ # The +FLAC+ and +WAV+ audio file formats include a header that describes the
217
+ # included audio content. You can request recognition for +WAV+ files that
218
+ # contain either +LINEAR16+ or +MULAW+ encoded audio.
219
+ # If you send +FLAC+ or +WAV+ audio file format in
220
+ # your request, you do not need to specify an +AudioEncoding+; the audio
221
+ # encoding format is determined from the file header. If you specify
222
+ # an +AudioEncoding+ when you send send +FLAC+ or +WAV+ audio, the
223
+ # encoding configuration must match the encoding described in the audio
224
+ # header; otherwise the request returns an
225
+ # {Google::Rpc::Code::INVALID_ARGUMENT} error code.
226
+ module AudioEncoding
227
+ # Not specified.
228
+ ENCODING_UNSPECIFIED = 0
229
+
230
+ # Uncompressed 16-bit signed little-endian samples (Linear PCM).
231
+ LINEAR16 = 1
232
+
233
+ # +FLAC+ (Free Lossless Audio
234
+ # Codec) is the recommended encoding because it is
235
+ # lossless--therefore recognition is not compromised--and
236
+ # requires only about half the bandwidth of +LINEAR16+. +FLAC+ stream
237
+ # encoding supports 16-bit and 24-bit samples, however, not all fields in
238
+ # +STREAMINFO+ are supported.
239
+ FLAC = 2
240
+
241
+ # 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
242
+ MULAW = 3
243
+
244
+ # Adaptive Multi-Rate Narrowband codec. +sample_rate_hertz+ must be 8000.
245
+ AMR = 4
246
+
247
+ # Adaptive Multi-Rate Wideband codec. +sample_rate_hertz+ must be 16000.
248
+ AMR_WB = 5
249
+
250
+ # Opus encoded audio frames in Ogg container
251
+ # ([OggOpus](https://wiki.xiph.org/OggOpus)).
252
+ # +sample_rate_hertz+ must be one of 8000, 12000, 16000, 24000, or 48000.
253
+ OGG_OPUS = 6
254
+
255
+ # Although the use of lossy encodings is not recommended, if a very low
256
+ # bitrate encoding is required, +OGG_OPUS+ is highly preferred over
257
+ # Speex encoding. The [Speex](https://speex.org/) encoding supported by
258
+ # Cloud Speech API has a header byte in each block, as in MIME type
259
+ # +audio/x-speex-with-header-byte+.
260
+ # It is a variant of the RTP Speex encoding defined in
261
+ # [RFC 5574](https://tools.ietf.org/html/rfc5574).
262
+ # The stream is a sequence of blocks, one block per RTP packet. Each block
263
+ # starts with a byte containing the length of the block, in bytes, followed
264
+ # by one or more frames of Speex data, padded to an integral number of
265
+ # bytes (octets) as specified in RFC 5574. In other words, each RTP header
266
+ # is replaced with a single byte containing the block length. Only Speex
267
+ # wideband is supported. +sample_rate_hertz+ must be 16000.
268
+ SPEEX_WITH_HEADER_BYTE = 7
269
+ end
270
+ end
271
+
272
+ # Description of audio data to be recognized.
273
+ # @!attribute [rw] interaction_type
274
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionMetadata::InteractionType]
275
+ # The use case most closely describing the audio content to be recognized.
276
+ # @!attribute [rw] industry_naics_code_of_audio
277
+ # @return [Integer]
278
+ # The industry vertical to which this speech recognition request most
279
+ # closely applies. This is most indicative of the topics contained
280
+ # in the audio. Use the 6-digit NAICS code to identify the industry
281
+ # vertical - see https://www.naics.com/search/.
282
+ # @!attribute [rw] microphone_distance
283
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionMetadata::MicrophoneDistance]
284
+ # The audio type that most closely describes the audio being recognized.
285
+ # @!attribute [rw] original_media_type
286
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionMetadata::OriginalMediaType]
287
+ # The original media the speech was recorded on.
288
+ # @!attribute [rw] recording_device_type
289
+ # @return [Google::Cloud::Speech::V1p1beta1::RecognitionMetadata::RecordingDeviceType]
290
+ # The type of device the speech was recorded with.
291
+ # @!attribute [rw] recording_device_name
292
+ # @return [String]
293
+ # The device used to make the recording. Examples 'Nexus 5X' or
294
+ # 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or
295
+ # 'Cardioid Microphone'.
296
+ # @!attribute [rw] original_mime_type
297
+ # @return [String]
298
+ # Mime type of the original audio file. For example +audio/m4a+,
299
+ # +audio/x-alaw-basic+, +audio/mp3+, +audio/3gpp+.
300
+ # A list of possible audio mime types is maintained at
301
+ # http://www.iana.org/assignments/media-types/media-types.xhtml#audio
302
+ # @!attribute [rw] obfuscated_id
303
+ # @return [Integer]
304
+ # Obfuscated (privacy-protected) ID of the user, to identify number of
305
+ # unique users using the service.
306
+ # @!attribute [rw] audio_topic
307
+ # @return [String]
308
+ # Description of the content. Eg. "Recordings of federal supreme court
309
+ # hearings from 2012".
310
+ class RecognitionMetadata
311
+ # Use case categories that the audio recognition request can be described
312
+ # by.
313
+ module InteractionType
314
+ # Use case is either unknown or is something other than one of the other
315
+ # values below.
316
+ INTERACTION_TYPE_UNSPECIFIED = 0
317
+
318
+ # Multiple people in a conversation or discussion. For example in a
319
+ # meeting with two or more people actively participating. Typically
320
+ # all the primary people speaking would be in the same room (if not,
321
+ # see PHONE_CALL)
322
+ DISCUSSION = 1
323
+
324
+ # One or more persons lecturing or presenting to others, mostly
325
+ # uninterrupted.
326
+ PRESENTATION = 2
327
+
328
+ # A phone-call or video-conference in which two or more people, who are
329
+ # not in the same room, are actively participating.
330
+ PHONE_CALL = 3
331
+
332
+ # A recorded message intended for another person to listen to.
333
+ VOICEMAIL = 4
334
+
335
+ # Professionally produced audio (eg. TV Show, Podcast).
336
+ PROFESSIONALLY_PRODUCED = 5
337
+
338
+ # Transcribe spoken questions and queries into text.
339
+ VOICE_SEARCH = 6
340
+
341
+ # Transcribe voice commands, such as for controlling a device.
342
+ VOICE_COMMAND = 7
343
+
344
+ # Transcribe speech to text to create a written document, such as a
345
+ # text-message, email or report.
346
+ DICTATION = 8
347
+ end
348
+
349
+ # Enumerates the types of capture settings describing an audio file.
350
+ module MicrophoneDistance
351
+ # Audio type is not known.
352
+ MICROPHONE_DISTANCE_UNSPECIFIED = 0
353
+
354
+ # The audio was captured from a closely placed microphone. Eg. phone,
355
+ # dictaphone, or handheld microphone. Generally if there speaker is within
356
+ # 1 meter of the microphone.
357
+ NEARFIELD = 1
358
+
359
+ # The speaker if within 3 meters of the microphone.
360
+ MIDFIELD = 2
361
+
362
+ # The speaker is more than 3 meters away from the microphone.
363
+ FARFIELD = 3
364
+ end
365
+
366
+ # The original media the speech was recorded on.
367
+ module OriginalMediaType
368
+ # Unknown original media type.
369
+ ORIGINAL_MEDIA_TYPE_UNSPECIFIED = 0
370
+
371
+ # The speech data is an audio recording.
372
+ AUDIO = 1
373
+
374
+ # The speech data originally recorded on a video.
375
+ VIDEO = 2
376
+ end
377
+
378
+ # The type of device the speech was recorded with.
379
+ module RecordingDeviceType
380
+ # The recording device is unknown.
381
+ RECORDING_DEVICE_TYPE_UNSPECIFIED = 0
382
+
383
+ # Speech was recorded on a smartphone.
384
+ SMARTPHONE = 1
385
+
386
+ # Speech was recorded using a personal computer or tablet.
387
+ PC = 2
388
+
389
+ # Speech was recorded over a phone line.
390
+ PHONE_LINE = 3
391
+
392
+ # Speech was recorded in a vehicle.
393
+ VEHICLE = 4
394
+
395
+ # Speech was recorded outdoors.
396
+ OTHER_OUTDOOR_DEVICE = 5
397
+
398
+ # Speech was recorded indoors.
399
+ OTHER_INDOOR_DEVICE = 6
400
+ end
401
+ end
402
+
403
+ # Provides "hints" to the speech recognizer to favor specific words and phrases
404
+ # in the results.
405
+ # @!attribute [rw] phrases
406
+ # @return [Array<String>]
407
+ # *Optional* A list of strings containing words and phrases "hints" so that
408
+ # the speech recognition is more likely to recognize them. This can be used
409
+ # to improve the accuracy for specific words and phrases, for example, if
410
+ # specific commands are typically spoken by the user. This can also be used
411
+ # to add additional words to the vocabulary of the recognizer. See
412
+ # [usage limits](https://cloud.google.com/speech/limits#content).
413
+ class SpeechContext; end
414
+
415
+ # Contains audio data in the encoding specified in the +RecognitionConfig+.
416
+ # Either +content+ or +uri+ must be supplied. Supplying both or neither
417
+ # returns {Google::Rpc::Code::INVALID_ARGUMENT}. See
418
+ # [audio limits](https://cloud.google.com/speech/limits#content).
419
+ # @!attribute [rw] content
420
+ # @return [String]
421
+ # The audio data bytes encoded as specified in
422
+ # +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
423
+ # pure binary representation, whereas JSON representations use base64.
424
+ # @!attribute [rw] uri
425
+ # @return [String]
426
+ # URI that points to a file that contains audio data bytes as specified in
427
+ # +RecognitionConfig+. Currently, only Google Cloud Storage URIs are
428
+ # supported, which must be specified in the following format:
429
+ # +gs://bucket_name/object_name+ (other URI formats return
430
+ # {Google::Rpc::Code::INVALID_ARGUMENT}). For more information, see
431
+ # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
432
+ class RecognitionAudio; end
433
+
434
+ # The only message returned to the client by the +Recognize+ method. It
435
+ # contains the result as zero or more sequential +SpeechRecognitionResult+
436
+ # messages.
437
+ # @!attribute [rw] results
438
+ # @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionResult>]
439
+ # Output only. Sequential list of transcription results corresponding to
440
+ # sequential portions of audio.
441
+ class RecognizeResponse; end
442
+
443
+ # The only message returned to the client by the +LongRunningRecognize+ method.
444
+ # It contains the result as zero or more sequential +SpeechRecognitionResult+
445
+ # messages. It is included in the +result.response+ field of the +Operation+
446
+ # returned by the +GetOperation+ call of the +google::longrunning::Operations+
447
+ # service.
448
+ # @!attribute [rw] results
449
+ # @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionResult>]
450
+ # Output only. Sequential list of transcription results corresponding to
451
+ # sequential portions of audio.
452
+ class LongRunningRecognizeResponse; end
453
+
454
+ # Describes the progress of a long-running +LongRunningRecognize+ call. It is
455
+ # included in the +metadata+ field of the +Operation+ returned by the
456
+ # +GetOperation+ call of the +google::longrunning::Operations+ service.
457
+ # @!attribute [rw] progress_percent
458
+ # @return [Integer]
459
+ # Approximate percentage of audio processed thus far. Guaranteed to be 100
460
+ # when the audio is fully processed and the results are available.
461
+ # @!attribute [rw] start_time
462
+ # @return [Google::Protobuf::Timestamp]
463
+ # Time when the request was received.
464
+ # @!attribute [rw] last_update_time
465
+ # @return [Google::Protobuf::Timestamp]
466
+ # Time of the most recent processing update.
467
+ class LongRunningRecognizeMetadata; end
468
+
469
+ # +StreamingRecognizeResponse+ is the only message returned to the client by
470
+ # +StreamingRecognize+. A series of zero or more +StreamingRecognizeResponse+
471
+ # messages are streamed back to the client. If there is no recognizable
472
+ # audio, and +single_utterance+ is set to false, then no messages are streamed
473
+ # back to the client.
474
+ #
475
+ # Here's an example of a series of ten +StreamingRecognizeResponse+s that might
476
+ # be returned while processing audio:
477
+ #
478
+ # 1. results { alternatives { transcript: "tube" } stability: 0.01 }
479
+ #
480
+ # 2. results { alternatives { transcript: "to be a" } stability: 0.01 }
481
+ #
482
+ # 3. results { alternatives { transcript: "to be" } stability: 0.9 }
483
+ # results { alternatives { transcript: " or not to be" } stability: 0.01 }
484
+ #
485
+ # 4. results { alternatives { transcript: "to be or not to be"
486
+ # confidence: 0.92 }
487
+ # alternatives { transcript: "to bee or not to bee" }
488
+ # is_final: true }
489
+ #
490
+ # 5. results { alternatives { transcript: " that's" } stability: 0.01 }
491
+ #
492
+ # 6. results { alternatives { transcript: " that is" } stability: 0.9 }
493
+ # results { alternatives { transcript: " the question" } stability: 0.01 }
494
+ #
495
+ # 7. results { alternatives { transcript: " that is the question"
496
+ # confidence: 0.98 }
497
+ # alternatives { transcript: " that was the question" }
498
+ # is_final: true }
499
+ #
500
+ # Notes:
501
+ #
502
+ # * Only two of the above responses #4 and #7 contain final results; they are
503
+ # indicated by +is_final: true+. Concatenating these together generates the
504
+ # full transcript: "to be or not to be that is the question".
505
+ #
506
+ # * The others contain interim +results+. #3 and #6 contain two interim
507
+ # +results+: the first portion has a high stability and is less likely to
508
+ # change; the second portion has a low stability and is very likely to
509
+ # change. A UI designer might choose to show only high stability +results+.
510
+ #
511
+ # * The specific +stability+ and +confidence+ values shown above are only for
512
+ # illustrative purposes. Actual values may vary.
513
+ #
514
+ # * In each response, only one of these fields will be set:
515
+ # +error+,
516
+ # +speech_event_type+, or
517
+ # one or more (repeated) +results+.
518
+ # @!attribute [rw] error
519
+ # @return [Google::Rpc::Status]
520
+ # Output only. If set, returns a {Google::Rpc::Status} message that
521
+ # specifies the error for the operation.
522
+ # @!attribute [rw] results
523
+ # @return [Array<Google::Cloud::Speech::V1p1beta1::StreamingRecognitionResult>]
524
+ # Output only. This repeated list contains zero or more results that
525
+ # correspond to consecutive portions of the audio currently being processed.
526
+ # It contains zero or one +is_final=true+ result (the newly settled portion),
527
+ # followed by zero or more +is_final=false+ results (the interim results).
528
+ # @!attribute [rw] speech_event_type
529
+ # @return [Google::Cloud::Speech::V1p1beta1::StreamingRecognizeResponse::SpeechEventType]
530
+ # Output only. Indicates the type of speech event.
531
+ class StreamingRecognizeResponse
532
+ # Indicates the type of speech event.
533
+ module SpeechEventType
534
+ # No speech event specified.
535
+ SPEECH_EVENT_UNSPECIFIED = 0
536
+
537
+ # This event indicates that the server has detected the end of the user's
538
+ # speech utterance and expects no additional speech. Therefore, the server
539
+ # will not process additional audio (although it may subsequently return
540
+ # additional results). The client should stop sending additional audio
541
+ # data, half-close the gRPC connection, and wait for any additional results
542
+ # until the server closes the gRPC connection. This event is only sent if
543
+ # +single_utterance+ was set to +true+, and is not used otherwise.
544
+ END_OF_SINGLE_UTTERANCE = 1
545
+ end
546
+ end
547
+
548
+ # A streaming speech recognition result corresponding to a portion of the audio
549
+ # that is currently being processed.
550
+ # @!attribute [rw] alternatives
551
+ # @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionAlternative>]
552
+ # Output only. May contain one or more recognition hypotheses (up to the
553
+ # maximum specified in +max_alternatives+).
554
+ # These alternatives are ordered in terms of accuracy, with the top (first)
555
+ # alternative being the most probable, as ranked by the recognizer.
556
+ # @!attribute [rw] is_final
557
+ # @return [true, false]
558
+ # Output only. If +false+, this +StreamingRecognitionResult+ represents an
559
+ # interim result that may change. If +true+, this is the final time the
560
+ # speech service will return this particular +StreamingRecognitionResult+,
561
+ # the recognizer will not return any further hypotheses for this portion of
562
+ # the transcript and corresponding audio.
563
+ # @!attribute [rw] stability
564
+ # @return [Float]
565
+ # Output only. An estimate of the likelihood that the recognizer will not
566
+ # change its guess about this interim result. Values range from 0.0
567
+ # (completely unstable) to 1.0 (completely stable).
568
+ # This field is only provided for interim results (+is_final=false+).
569
+ # The default of 0.0 is a sentinel value indicating +stability+ was not set.
570
+ class StreamingRecognitionResult; end
571
+
572
+ # A speech recognition result corresponding to a portion of the audio.
573
+ # @!attribute [rw] alternatives
574
+ # @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionAlternative>]
575
+ # Output only. May contain one or more recognition hypotheses (up to the
576
+ # maximum specified in +max_alternatives+).
577
+ # These alternatives are ordered in terms of accuracy, with the top (first)
578
+ # alternative being the most probable, as ranked by the recognizer.
579
+ class SpeechRecognitionResult; end
580
+
581
+ # Alternative hypotheses (a.k.a. n-best list).
582
+ # @!attribute [rw] transcript
583
+ # @return [String]
584
+ # Output only. Transcript text representing the words that the user spoke.
585
+ # @!attribute [rw] confidence
586
+ # @return [Float]
587
+ # Output only. The confidence estimate between 0.0 and 1.0. A higher number
588
+ # indicates an estimated greater likelihood that the recognized words are
589
+ # correct. This field is set only for the top alternative of a non-streaming
590
+ # result or, of a streaming result where +is_final=true+.
591
+ # This field is not guaranteed to be accurate and users should not rely on it
592
+ # to be always provided.
593
+ # The default of 0.0 is a sentinel value indicating +confidence+ was not set.
594
+ # @!attribute [rw] words
595
+ # @return [Array<Google::Cloud::Speech::V1p1beta1::WordInfo>]
596
+ # Output only. A list of word-specific information for each recognized word.
597
+ # Note: When enable_speaker_diarization is true, you will see all the words
598
+ # from the beginning of the audio.
599
+ class SpeechRecognitionAlternative; end
600
+
601
+ # Word-specific information for recognized words.
602
+ # @!attribute [rw] start_time
603
+ # @return [Google::Protobuf::Duration]
604
+ # Output only. Time offset relative to the beginning of the audio,
605
+ # and corresponding to the start of the spoken word.
606
+ # This field is only set if +enable_word_time_offsets=true+ and only
607
+ # in the top hypothesis.
608
+ # This is an experimental feature and the accuracy of the time offset can
609
+ # vary.
610
+ # @!attribute [rw] end_time
611
+ # @return [Google::Protobuf::Duration]
612
+ # Output only. Time offset relative to the beginning of the audio,
613
+ # and corresponding to the end of the spoken word.
614
+ # This field is only set if +enable_word_time_offsets=true+ and only
615
+ # in the top hypothesis.
616
+ # This is an experimental feature and the accuracy of the time offset can
617
+ # vary.
618
+ # @!attribute [rw] word
619
+ # @return [String]
620
+ # Output only. The word corresponding to this set of information.
621
+ class WordInfo; end
622
+ end
623
+ end
624
+ end
625
+ end