google-cloud-speech 0.31.0 → 0.31.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -5
- data/lib/google/cloud/speech.rb +4 -4
- data/lib/google/cloud/speech/v1.rb +4 -4
- data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +97 -97
- data/lib/google/cloud/speech/v1/doc/google/longrunning/operations.rb +9 -9
- data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +8 -8
- data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +3 -3
- data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +11 -11
- data/lib/google/cloud/speech/v1/speech_client.rb +2 -2
- data/lib/google/cloud/speech/v1p1beta1.rb +4 -4
- data/lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb +107 -107
- data/lib/google/cloud/speech/v1p1beta1/doc/google/longrunning/operations.rb +9 -9
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/any.rb +8 -8
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/duration.rb +3 -3
- data/lib/google/cloud/speech/v1p1beta1/doc/google/rpc/status.rb +11 -11
- data/lib/google/cloud/speech/v1p1beta1/speech_client.rb +2 -2
- metadata +2 -4
- data/lib/google/cloud/speech/v1/doc/overview.rb +0 -99
- data/lib/google/cloud/speech/v1p1beta1/doc/overview.rb +0 -99
@@ -21,7 +21,7 @@ module Google
|
|
21
21
|
# @return [String]
|
22
22
|
# The server-assigned name, which is only unique within the same service that
|
23
23
|
# originally returns it. If you use the default HTTP mapping, the
|
24
|
-
#
|
24
|
+
# `name` should have the format of `operations/some/unique/name`.
|
25
25
|
# @!attribute [rw] metadata
|
26
26
|
# @return [Google::Protobuf::Any]
|
27
27
|
# Service-specific metadata associated with the operation. It typically
|
@@ -30,8 +30,8 @@ module Google
|
|
30
30
|
# long-running operation should document the metadata type, if any.
|
31
31
|
# @!attribute [rw] done
|
32
32
|
# @return [true, false]
|
33
|
-
# If the value is
|
34
|
-
# If true, the operation is completed, and either
|
33
|
+
# If the value is `false`, it means the operation is still in progress.
|
34
|
+
# If true, the operation is completed, and either `error` or `response` is
|
35
35
|
# available.
|
36
36
|
# @!attribute [rw] error
|
37
37
|
# @return [Google::Rpc::Status]
|
@@ -39,13 +39,13 @@ module Google
|
|
39
39
|
# @!attribute [rw] response
|
40
40
|
# @return [Google::Protobuf::Any]
|
41
41
|
# The normal response of the operation in case of success. If the original
|
42
|
-
# method returns no data on success, such as
|
43
|
-
#
|
44
|
-
#
|
45
|
-
# methods, the response should have the type
|
42
|
+
# method returns no data on success, such as `Delete`, the response is
|
43
|
+
# `google.protobuf.Empty`. If the original method is standard
|
44
|
+
# `Get`/`Create`/`Update`, the response should be the resource. For other
|
45
|
+
# methods, the response should have the type `XxxResponse`, where `Xxx`
|
46
46
|
# is the original method name. For example, if the original method name
|
47
|
-
# is
|
48
|
-
#
|
47
|
+
# is `TakeSnapshot()`, the inferred response type is
|
48
|
+
# `TakeSnapshotResponse`.
|
49
49
|
class Operation; end
|
50
50
|
|
51
51
|
# The request message for {Google::Longrunning::Operations::GetOperation Operations::GetOperation}.
|
@@ -15,7 +15,7 @@
|
|
15
15
|
|
16
16
|
module Google
|
17
17
|
module Protobuf
|
18
|
-
#
|
18
|
+
# `Any` contains an arbitrary serialized protocol buffer message along with a
|
19
19
|
# URL that describes the type of the serialized message.
|
20
20
|
#
|
21
21
|
# Protobuf library provides support to pack/unpack Any values in the form
|
@@ -69,9 +69,9 @@ module Google
|
|
69
69
|
#
|
70
70
|
# = JSON
|
71
71
|
#
|
72
|
-
# The JSON representation of an
|
72
|
+
# The JSON representation of an `Any` value uses the regular
|
73
73
|
# representation of the deserialized, embedded message, with an
|
74
|
-
# additional field
|
74
|
+
# additional field `@type` which contains the type URL. Example:
|
75
75
|
#
|
76
76
|
# package google.profile;
|
77
77
|
# message Person {
|
@@ -87,7 +87,7 @@ module Google
|
|
87
87
|
#
|
88
88
|
# If the embedded message type is well-known and has a custom JSON
|
89
89
|
# representation, that representation will be embedded adding a field
|
90
|
-
#
|
90
|
+
# `value` which holds the custom JSON in addition to the `@type`
|
91
91
|
# field. Example (for message {Google::Protobuf::Duration}):
|
92
92
|
#
|
93
93
|
# {
|
@@ -99,15 +99,15 @@ module Google
|
|
99
99
|
# A URL/resource name that uniquely identifies the type of the serialized
|
100
100
|
# protocol buffer message. The last segment of the URL's path must represent
|
101
101
|
# the fully qualified name of the type (as in
|
102
|
-
#
|
102
|
+
# `path/google.protobuf.Duration`). The name should be in a canonical form
|
103
103
|
# (e.g., leading "." is not accepted).
|
104
104
|
#
|
105
105
|
# In practice, teams usually precompile into the binary all types that they
|
106
106
|
# expect it to use in the context of Any. However, for URLs which use the
|
107
|
-
# scheme
|
107
|
+
# scheme `http`, `https`, or no scheme, one can optionally set up a type
|
108
108
|
# server that maps type URLs to message definitions as follows:
|
109
109
|
#
|
110
|
-
# * If no scheme is provided,
|
110
|
+
# * If no scheme is provided, `https` is assumed.
|
111
111
|
# * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
|
112
112
|
# value in binary format, or produce an error.
|
113
113
|
# * Applications are allowed to cache lookup results based on the
|
@@ -120,7 +120,7 @@ module Google
|
|
120
120
|
# protobuf release, and it is not used for type URLs beginning with
|
121
121
|
# type.googleapis.com.
|
122
122
|
#
|
123
|
-
# Schemes other than
|
123
|
+
# Schemes other than `http`, `https` (or the empty scheme) might be
|
124
124
|
# used with implementation specific semantics.
|
125
125
|
# @!attribute [rw] value
|
126
126
|
# @return [String]
|
@@ -82,9 +82,9 @@ module Google
|
|
82
82
|
# @return [Integer]
|
83
83
|
# Signed fractions of a second at nanosecond resolution of the span
|
84
84
|
# of time. Durations less than one second are represented with a 0
|
85
|
-
#
|
86
|
-
# of one second or more, a non-zero value for the
|
87
|
-
# of the same sign as the
|
85
|
+
# `seconds` field and a positive or negative `nanos` field. For durations
|
86
|
+
# of one second or more, a non-zero value for the `nanos` field must be
|
87
|
+
# of the same sign as the `seconds` field. Must be from -999,999,999
|
88
88
|
# to +999,999,999 inclusive.
|
89
89
|
class Duration; end
|
90
90
|
end
|
@@ -15,7 +15,7 @@
|
|
15
15
|
|
16
16
|
module Google
|
17
17
|
module Rpc
|
18
|
-
# The
|
18
|
+
# The `Status` type defines a logical error model that is suitable for different
|
19
19
|
# programming environments, including REST APIs and RPC APIs. It is used by
|
20
20
|
# [gRPC](https://github.com/grpc). The error model is designed to be:
|
21
21
|
#
|
@@ -24,7 +24,7 @@ module Google
|
|
24
24
|
#
|
25
25
|
# = Overview
|
26
26
|
#
|
27
|
-
# The
|
27
|
+
# The `Status` message contains three pieces of data: error code, error message,
|
28
28
|
# and error details. The error code should be an enum value of
|
29
29
|
# {Google::Rpc::Code}, but it may accept additional error codes if needed. The
|
30
30
|
# error message should be a developer-facing English message that helps
|
@@ -32,40 +32,40 @@ module Google
|
|
32
32
|
# error message is needed, put the localized message in the error details or
|
33
33
|
# localize it in the client. The optional error details may contain arbitrary
|
34
34
|
# information about the error. There is a predefined set of error detail types
|
35
|
-
# in the package
|
35
|
+
# in the package `google.rpc` that can be used for common error conditions.
|
36
36
|
#
|
37
37
|
# = Language mapping
|
38
38
|
#
|
39
|
-
# The
|
40
|
-
# is not necessarily the actual wire format. When the
|
39
|
+
# The `Status` message is the logical representation of the error model, but it
|
40
|
+
# is not necessarily the actual wire format. When the `Status` message is
|
41
41
|
# exposed in different client libraries and different wire protocols, it can be
|
42
42
|
# mapped differently. For example, it will likely be mapped to some exceptions
|
43
43
|
# in Java, but more likely mapped to some error codes in C.
|
44
44
|
#
|
45
45
|
# = Other uses
|
46
46
|
#
|
47
|
-
# The error model and the
|
47
|
+
# The error model and the `Status` message can be used in a variety of
|
48
48
|
# environments, either with or without APIs, to provide a
|
49
49
|
# consistent developer experience across different environments.
|
50
50
|
#
|
51
51
|
# Example uses of this error model include:
|
52
52
|
#
|
53
53
|
# * Partial errors. If a service needs to return partial errors to the client,
|
54
|
-
# it may embed the
|
54
|
+
# it may embed the `Status` in the normal response to indicate the partial
|
55
55
|
# errors.
|
56
56
|
#
|
57
57
|
# * Workflow errors. A typical workflow has multiple steps. Each step may
|
58
|
-
# have a
|
58
|
+
# have a `Status` message for error reporting.
|
59
59
|
#
|
60
60
|
# * Batch operations. If a client uses batch request and batch response, the
|
61
|
-
#
|
61
|
+
# `Status` message should be used directly inside batch response, one for
|
62
62
|
# each error sub-response.
|
63
63
|
#
|
64
64
|
# * Asynchronous operations. If an API call embeds asynchronous operation
|
65
65
|
# results in its response, the status of those operations should be
|
66
|
-
# represented directly using the
|
66
|
+
# represented directly using the `Status` message.
|
67
67
|
#
|
68
|
-
# * Logging. If some API errors are stored in logs, the message
|
68
|
+
# * Logging. If some API errors are stored in logs, the message `Status` could
|
69
69
|
# be used directly after any stripping needed for security/privacy reasons.
|
70
70
|
# @!attribute [rw] code
|
71
71
|
# @return [Integer]
|
@@ -246,8 +246,8 @@ module Google
|
|
246
246
|
|
247
247
|
# Performs asynchronous speech recognition: receive results via the
|
248
248
|
# google.longrunning.Operations interface. Returns either an
|
249
|
-
#
|
250
|
-
# a
|
249
|
+
# `Operation.error` or an `Operation.response` which contains
|
250
|
+
# a `LongRunningRecognizeResponse` message.
|
251
251
|
#
|
252
252
|
# @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
|
253
253
|
# *Required* Provides information to the recognizer that specifies how to
|
@@ -22,7 +22,7 @@ module Google
|
|
22
22
|
# rubocop:disable LineLength
|
23
23
|
|
24
24
|
##
|
25
|
-
# # Ruby Client for Cloud Speech API ([Alpha](https://github.com/
|
25
|
+
# # Ruby Client for Cloud Speech API ([Alpha](https://github.com/googleapis/google-cloud-ruby#versioning))
|
26
26
|
#
|
27
27
|
# [Cloud Speech API][Product Documentation]:
|
28
28
|
# Converts audio to text by applying powerful neural network models.
|
@@ -35,7 +35,7 @@ module Google
|
|
35
35
|
# 1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project)
|
36
36
|
# 2. [Enable billing for your project.](https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project)
|
37
37
|
# 3. [Enable the Cloud Speech API.](https://console.cloud.google.com/apis/library/speech.googleapis.com)
|
38
|
-
# 4. [Setup Authentication.](https://
|
38
|
+
# 4. [Setup Authentication.](https://googleapis.github.io/google-cloud-ruby/#/docs/google-cloud/master/guides/authentication)
|
39
39
|
#
|
40
40
|
# ### Installation
|
41
41
|
# ```
|
@@ -64,7 +64,7 @@ module Google
|
|
64
64
|
# ### Next Steps
|
65
65
|
# - Read the [Cloud Speech API Product documentation][Product Documentation]
|
66
66
|
# to learn more about the product and see How-to Guides.
|
67
|
-
# - View this [repository's main README](https://github.com/
|
67
|
+
# - View this [repository's main README](https://github.com/googleapis/google-cloud-ruby/blob/master/README.md)
|
68
68
|
# to see the full list of Cloud APIs that we cover.
|
69
69
|
#
|
70
70
|
# [Product Documentation]: https://cloud.google.com/speech
|
@@ -73,7 +73,7 @@ module Google
|
|
73
73
|
#
|
74
74
|
# To enable logging for this library, set the logger for the underlying [gRPC](https://github.com/grpc/grpc/tree/master/src/ruby) library.
|
75
75
|
# The logger that you set may be a Ruby stdlib [`Logger`](https://ruby-doc.org/stdlib-2.5.0/libdoc/logger/rdoc/Logger.html) as shown below,
|
76
|
-
# or a [`Google::Cloud::Logging::Logger`](https://
|
76
|
+
# or a [`Google::Cloud::Logging::Logger`](https://googleapis.github.io/google-cloud-ruby/#/docs/google-cloud-logging/latest/google/cloud/logging/logger)
|
77
77
|
# that will write logs to [Stackdriver Logging](https://cloud.google.com/logging/). See [grpc/logconfig.rb](https://github.com/grpc/grpc/blob/master/src/ruby/lib/grpc/logconfig.rb)
|
78
78
|
# and the gRPC [spec_helper.rb](https://github.com/grpc/grpc/blob/master/src/ruby/spec/spec_helper.rb) for additional information.
|
79
79
|
#
|
@@ -17,7 +17,7 @@ module Google
|
|
17
17
|
module Cloud
|
18
18
|
module Speech
|
19
19
|
module V1p1beta1
|
20
|
-
# The top-level message sent by the client for the
|
20
|
+
# The top-level message sent by the client for the `Recognize` method.
|
21
21
|
# @!attribute [rw] config
|
22
22
|
# @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig]
|
23
23
|
# *Required* Provides information to the recognizer that specifies how to
|
@@ -27,7 +27,7 @@ module Google
|
|
27
27
|
# *Required* The audio data to be recognized.
|
28
28
|
class RecognizeRequest; end
|
29
29
|
|
30
|
-
# The top-level message sent by the client for the
|
30
|
+
# The top-level message sent by the client for the `LongRunningRecognize`
|
31
31
|
# method.
|
32
32
|
# @!attribute [rw] config
|
33
33
|
# @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig]
|
@@ -38,24 +38,24 @@ module Google
|
|
38
38
|
# *Required* The audio data to be recognized.
|
39
39
|
class LongRunningRecognizeRequest; end
|
40
40
|
|
41
|
-
# The top-level message sent by the client for the
|
42
|
-
# Multiple
|
43
|
-
# must contain a
|
44
|
-
# All subsequent messages must contain
|
45
|
-
#
|
41
|
+
# The top-level message sent by the client for the `StreamingRecognize` method.
|
42
|
+
# Multiple `StreamingRecognizeRequest` messages are sent. The first message
|
43
|
+
# must contain a `streaming_config` message and must not contain `audio` data.
|
44
|
+
# All subsequent messages must contain `audio` data and must not contain a
|
45
|
+
# `streaming_config` message.
|
46
46
|
# @!attribute [rw] streaming_config
|
47
47
|
# @return [Google::Cloud::Speech::V1p1beta1::StreamingRecognitionConfig]
|
48
48
|
# Provides information to the recognizer that specifies how to process the
|
49
|
-
# request. The first
|
50
|
-
#
|
49
|
+
# request. The first `StreamingRecognizeRequest` message must contain a
|
50
|
+
# `streaming_config` message.
|
51
51
|
# @!attribute [rw] audio_content
|
52
52
|
# @return [String]
|
53
53
|
# The audio data to be recognized. Sequential chunks of audio data are sent
|
54
|
-
# in sequential
|
55
|
-
#
|
56
|
-
# and all subsequent
|
57
|
-
#
|
58
|
-
#
|
54
|
+
# in sequential `StreamingRecognizeRequest` messages. The first
|
55
|
+
# `StreamingRecognizeRequest` message must not contain `audio_content` data
|
56
|
+
# and all subsequent `StreamingRecognizeRequest` messages must contain
|
57
|
+
# `audio_content` data. The audio bytes must be encoded as specified in
|
58
|
+
# `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a
|
59
59
|
# pure binary representation (not base64). See
|
60
60
|
# [audio limits](https://cloud.google.com/speech/limits#content).
|
61
61
|
class StreamingRecognizeRequest; end
|
@@ -68,49 +68,49 @@ module Google
|
|
68
68
|
# process the request.
|
69
69
|
# @!attribute [rw] single_utterance
|
70
70
|
# @return [true, false]
|
71
|
-
# *Optional* If
|
71
|
+
# *Optional* If `false` or omitted, the recognizer will perform continuous
|
72
72
|
# recognition (continuing to wait for and process audio even if the user
|
73
73
|
# pauses speaking) until the client closes the input stream (gRPC API) or
|
74
74
|
# until the maximum time limit has been reached. May return multiple
|
75
|
-
#
|
75
|
+
# `StreamingRecognitionResult`s with the `is_final` flag set to `true`.
|
76
76
|
#
|
77
|
-
# If
|
77
|
+
# If `true`, the recognizer will detect a single spoken utterance. When it
|
78
78
|
# detects that the user has paused or stopped speaking, it will return an
|
79
|
-
#
|
80
|
-
# more than one
|
81
|
-
#
|
79
|
+
# `END_OF_SINGLE_UTTERANCE` event and cease recognition. It will return no
|
80
|
+
# more than one `StreamingRecognitionResult` with the `is_final` flag set to
|
81
|
+
# `true`.
|
82
82
|
# @!attribute [rw] interim_results
|
83
83
|
# @return [true, false]
|
84
|
-
# *Optional* If
|
84
|
+
# *Optional* If `true`, interim results (tentative hypotheses) may be
|
85
85
|
# returned as they become available (these interim results are indicated with
|
86
|
-
# the
|
87
|
-
# If
|
86
|
+
# the `is_final=false` flag).
|
87
|
+
# If `false` or omitted, only `is_final=true` result(s) are returned.
|
88
88
|
class StreamingRecognitionConfig; end
|
89
89
|
|
90
90
|
# Provides information to the recognizer that specifies how to process the
|
91
91
|
# request.
|
92
92
|
# @!attribute [rw] encoding
|
93
93
|
# @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding]
|
94
|
-
# Encoding of audio data sent in all
|
95
|
-
# This field is optional for
|
94
|
+
# Encoding of audio data sent in all `RecognitionAudio` messages.
|
95
|
+
# This field is optional for `FLAC` and `WAV` audio files and required
|
96
96
|
# for all other audio formats. For details, see {Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
97
97
|
# @!attribute [rw] sample_rate_hertz
|
98
98
|
# @return [Integer]
|
99
99
|
# Sample rate in Hertz of the audio data sent in all
|
100
|
-
#
|
100
|
+
# `RecognitionAudio` messages. Valid values are: 8000-48000.
|
101
101
|
# 16000 is optimal. For best results, set the sampling rate of the audio
|
102
102
|
# source to 16000 Hz. If that's not possible, use the native sample rate of
|
103
103
|
# the audio source (instead of re-sampling).
|
104
|
-
# This field is optional for
|
104
|
+
# This field is optional for `FLAC` and `WAV` audio files and required
|
105
105
|
# for all other audio formats. For details, see {Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
106
106
|
# @!attribute [rw] audio_channel_count
|
107
107
|
# @return [Integer]
|
108
108
|
# *Optional* The number of channels in the input audio data.
|
109
109
|
# ONLY set this for MULTI-CHANNEL recognition.
|
110
|
-
# Valid values for LINEAR16 and FLAC are
|
110
|
+
# Valid values for LINEAR16 and FLAC are `1`-`8`.
|
111
111
|
# Valid values for OGG_OPUS are '1'-'254'.
|
112
|
-
# Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only
|
113
|
-
# If
|
112
|
+
# Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
|
113
|
+
# If `0` or omitted, defaults to one channel (mono).
|
114
114
|
# NOTE: We only recognize the first channel by default.
|
115
115
|
# To perform independent recognition on each channel set
|
116
116
|
# enable_separate_recognition_per_channel to 'true'.
|
@@ -146,31 +146,31 @@ module Google
|
|
146
146
|
# @!attribute [rw] max_alternatives
|
147
147
|
# @return [Integer]
|
148
148
|
# *Optional* Maximum number of recognition hypotheses to be returned.
|
149
|
-
# Specifically, the maximum number of
|
150
|
-
# within each
|
151
|
-
# The server may return fewer than
|
152
|
-
# Valid values are
|
149
|
+
# Specifically, the maximum number of `SpeechRecognitionAlternative` messages
|
150
|
+
# within each `SpeechRecognitionResult`.
|
151
|
+
# The server may return fewer than `max_alternatives`.
|
152
|
+
# Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
|
153
153
|
# one. If omitted, will return a maximum of one.
|
154
154
|
# @!attribute [rw] profanity_filter
|
155
155
|
# @return [true, false]
|
156
|
-
# *Optional* If set to
|
156
|
+
# *Optional* If set to `true`, the server will attempt to filter out
|
157
157
|
# profanities, replacing all but the initial character in each filtered word
|
158
|
-
# with asterisks, e.g. "f***". If set to
|
158
|
+
# with asterisks, e.g. "f***". If set to `false` or omitted, profanities
|
159
159
|
# won't be filtered out.
|
160
160
|
# @!attribute [rw] speech_contexts
|
161
161
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechContext>]
|
162
162
|
# *Optional* A means to provide context to assist the speech recognition.
|
163
163
|
# @!attribute [rw] enable_word_time_offsets
|
164
164
|
# @return [true, false]
|
165
|
-
# *Optional* If
|
165
|
+
# *Optional* If `true`, the top result includes a list of words and
|
166
166
|
# the start and end time offsets (timestamps) for those words. If
|
167
|
-
#
|
168
|
-
#
|
167
|
+
# `false`, no word-level time offset information is returned. The default is
|
168
|
+
# `false`.
|
169
169
|
# @!attribute [rw] enable_word_confidence
|
170
170
|
# @return [true, false]
|
171
|
-
# *Optional* If
|
172
|
-
# confidence for those words. If
|
173
|
-
# information is returned. The default is
|
171
|
+
# *Optional* If `true`, the top result includes a list of words and the
|
172
|
+
# confidence for those words. If `false`, no word-level confidence
|
173
|
+
# information is returned. The default is `false`.
|
174
174
|
# @!attribute [rw] enable_automatic_punctuation
|
175
175
|
# @return [true, false]
|
176
176
|
# *Optional* If 'true', adds punctuation to recognition result hypotheses.
|
@@ -235,15 +235,15 @@ module Google
|
|
235
235
|
# @!attribute [rw] use_enhanced
|
236
236
|
# @return [true, false]
|
237
237
|
# *Optional* Set to true to use an enhanced model for speech recognition.
|
238
|
-
# You must also set the
|
239
|
-
#
|
240
|
-
#
|
238
|
+
# You must also set the `model` field to a valid, enhanced model. If
|
239
|
+
# `use_enhanced` is set to true and the `model` field is not set, then
|
240
|
+
# `use_enhanced` is ignored. If `use_enhanced` is true and an enhanced
|
241
241
|
# version of the specified model does not exist, then the speech is
|
242
242
|
# recognized using the standard version of the specified model.
|
243
243
|
#
|
244
244
|
# Enhanced speech models require that you opt-in to the audio logging using
|
245
245
|
# instructions in the [alpha documentation](https://cloud.google.com/speech/data-sharing). If you set
|
246
|
-
#
|
246
|
+
# `use_enhanced` to true and you have not enabled audio logging, then you
|
247
247
|
# will receive an error.
|
248
248
|
class RecognitionConfig
|
249
249
|
# The encoding of the audio data sent in the request.
|
@@ -251,18 +251,18 @@ module Google
|
|
251
251
|
# All encodings support only 1 channel (mono) audio.
|
252
252
|
#
|
253
253
|
# For best results, the audio source should be captured and transmitted using
|
254
|
-
# a lossless encoding (
|
254
|
+
# a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
|
255
255
|
# recognition can be reduced if lossy codecs are used to capture or transmit
|
256
256
|
# audio, particularly if background noise is present. Lossy codecs include
|
257
|
-
#
|
257
|
+
# `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, and `SPEEX_WITH_HEADER_BYTE`.
|
258
258
|
#
|
259
|
-
# The
|
260
|
-
# included audio content. You can request recognition for
|
261
|
-
# contain either
|
262
|
-
# If you send
|
263
|
-
# your request, you do not need to specify an
|
259
|
+
# The `FLAC` and `WAV` audio file formats include a header that describes the
|
260
|
+
# included audio content. You can request recognition for `WAV` files that
|
261
|
+
# contain either `LINEAR16` or `MULAW` encoded audio.
|
262
|
+
# If you send `FLAC` or `WAV` audio file format in
|
263
|
+
# your request, you do not need to specify an `AudioEncoding`; the audio
|
264
264
|
# encoding format is determined from the file header. If you specify
|
265
|
-
# an
|
265
|
+
# an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
|
266
266
|
# encoding configuration must match the encoding described in the audio
|
267
267
|
# header; otherwise the request returns an
|
268
268
|
# {Google::Rpc::Code::INVALID_ARGUMENT} error code.
|
@@ -273,33 +273,33 @@ module Google
|
|
273
273
|
# Uncompressed 16-bit signed little-endian samples (Linear PCM).
|
274
274
|
LINEAR16 = 1
|
275
275
|
|
276
|
-
#
|
276
|
+
# `FLAC` (Free Lossless Audio
|
277
277
|
# Codec) is the recommended encoding because it is
|
278
278
|
# lossless--therefore recognition is not compromised--and
|
279
|
-
# requires only about half the bandwidth of
|
279
|
+
# requires only about half the bandwidth of `LINEAR16`. `FLAC` stream
|
280
280
|
# encoding supports 16-bit and 24-bit samples, however, not all fields in
|
281
|
-
#
|
281
|
+
# `STREAMINFO` are supported.
|
282
282
|
FLAC = 2
|
283
283
|
|
284
284
|
# 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
|
285
285
|
MULAW = 3
|
286
286
|
|
287
|
-
# Adaptive Multi-Rate Narrowband codec.
|
287
|
+
# Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000.
|
288
288
|
AMR = 4
|
289
289
|
|
290
|
-
# Adaptive Multi-Rate Wideband codec.
|
290
|
+
# Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000.
|
291
291
|
AMR_WB = 5
|
292
292
|
|
293
293
|
# Opus encoded audio frames in Ogg container
|
294
294
|
# ([OggOpus](https://wiki.xiph.org/OggOpus)).
|
295
|
-
#
|
295
|
+
# `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000.
|
296
296
|
OGG_OPUS = 6
|
297
297
|
|
298
298
|
# Although the use of lossy encodings is not recommended, if a very low
|
299
|
-
# bitrate encoding is required,
|
299
|
+
# bitrate encoding is required, `OGG_OPUS` is highly preferred over
|
300
300
|
# Speex encoding. The [Speex](https://speex.org/) encoding supported by
|
301
301
|
# Cloud Speech API has a header byte in each block, as in MIME type
|
302
|
-
#
|
302
|
+
# `audio/x-speex-with-header-byte`.
|
303
303
|
# It is a variant of the RTP Speex encoding defined in
|
304
304
|
# [RFC 5574](https://tools.ietf.org/html/rfc5574).
|
305
305
|
# The stream is a sequence of blocks, one block per RTP packet. Each block
|
@@ -307,7 +307,7 @@ module Google
|
|
307
307
|
# by one or more frames of Speex data, padded to an integral number of
|
308
308
|
# bytes (octets) as specified in RFC 5574. In other words, each RTP header
|
309
309
|
# is replaced with a single byte containing the block length. Only Speex
|
310
|
-
# wideband is supported.
|
310
|
+
# wideband is supported. `sample_rate_hertz` must be 16000.
|
311
311
|
SPEEX_WITH_HEADER_BYTE = 7
|
312
312
|
end
|
313
313
|
end
|
@@ -338,8 +338,8 @@ module Google
|
|
338
338
|
# 'Cardioid Microphone'.
|
339
339
|
# @!attribute [rw] original_mime_type
|
340
340
|
# @return [String]
|
341
|
-
# Mime type of the original audio file. For example
|
342
|
-
#
|
341
|
+
# Mime type of the original audio file. For example `audio/m4a`,
|
342
|
+
# `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
|
343
343
|
# A list of possible audio mime types is maintained at
|
344
344
|
# http://www.iana.org/assignments/media-types/media-types.xhtml#audio
|
345
345
|
# @!attribute [rw] obfuscated_id
|
@@ -455,27 +455,27 @@ module Google
|
|
455
455
|
# [usage limits](https://cloud.google.com/speech/limits#content).
|
456
456
|
class SpeechContext; end
|
457
457
|
|
458
|
-
# Contains audio data in the encoding specified in the
|
459
|
-
# Either
|
458
|
+
# Contains audio data in the encoding specified in the `RecognitionConfig`.
|
459
|
+
# Either `content` or `uri` must be supplied. Supplying both or neither
|
460
460
|
# returns {Google::Rpc::Code::INVALID_ARGUMENT}. See
|
461
461
|
# [audio limits](https://cloud.google.com/speech/limits#content).
|
462
462
|
# @!attribute [rw] content
|
463
463
|
# @return [String]
|
464
464
|
# The audio data bytes encoded as specified in
|
465
|
-
#
|
465
|
+
# `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a
|
466
466
|
# pure binary representation, whereas JSON representations use base64.
|
467
467
|
# @!attribute [rw] uri
|
468
468
|
# @return [String]
|
469
469
|
# URI that points to a file that contains audio data bytes as specified in
|
470
|
-
#
|
470
|
+
# `RecognitionConfig`. Currently, only Google Cloud Storage URIs are
|
471
471
|
# supported, which must be specified in the following format:
|
472
|
-
#
|
472
|
+
# `gs://bucket_name/object_name` (other URI formats return
|
473
473
|
# {Google::Rpc::Code::INVALID_ARGUMENT}). For more information, see
|
474
474
|
# [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
|
475
475
|
class RecognitionAudio; end
|
476
476
|
|
477
|
-
# The only message returned to the client by the
|
478
|
-
# contains the result as zero or more sequential
|
477
|
+
# The only message returned to the client by the `Recognize` method. It
|
478
|
+
# contains the result as zero or more sequential `SpeechRecognitionResult`
|
479
479
|
# messages.
|
480
480
|
# @!attribute [rw] results
|
481
481
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionResult>]
|
@@ -483,10 +483,10 @@ module Google
|
|
483
483
|
# sequential portions of audio.
|
484
484
|
class RecognizeResponse; end
|
485
485
|
|
486
|
-
# The only message returned to the client by the
|
487
|
-
# It contains the result as zero or more sequential
|
488
|
-
# messages. It is included in the
|
489
|
-
# returned by the
|
486
|
+
# The only message returned to the client by the `LongRunningRecognize` method.
|
487
|
+
# It contains the result as zero or more sequential `SpeechRecognitionResult`
|
488
|
+
# messages. It is included in the `result.response` field of the `Operation`
|
489
|
+
# returned by the `GetOperation` call of the `google::longrunning::Operations`
|
490
490
|
# service.
|
491
491
|
# @!attribute [rw] results
|
492
492
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionResult>]
|
@@ -494,9 +494,9 @@ module Google
|
|
494
494
|
# sequential portions of audio.
|
495
495
|
class LongRunningRecognizeResponse; end
|
496
496
|
|
497
|
-
# Describes the progress of a long-running
|
498
|
-
# included in the
|
499
|
-
#
|
497
|
+
# Describes the progress of a long-running `LongRunningRecognize` call. It is
|
498
|
+
# included in the `metadata` field of the `Operation` returned by the
|
499
|
+
# `GetOperation` call of the `google::longrunning::Operations` service.
|
500
500
|
# @!attribute [rw] progress_percent
|
501
501
|
# @return [Integer]
|
502
502
|
# Approximate percentage of audio processed thus far. Guaranteed to be 100
|
@@ -509,13 +509,13 @@ module Google
|
|
509
509
|
# Time of the most recent processing update.
|
510
510
|
class LongRunningRecognizeMetadata; end
|
511
511
|
|
512
|
-
#
|
513
|
-
#
|
512
|
+
# `StreamingRecognizeResponse` is the only message returned to the client by
|
513
|
+
# `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse`
|
514
514
|
# messages are streamed back to the client. If there is no recognizable
|
515
|
-
# audio, and
|
515
|
+
# audio, and `single_utterance` is set to false, then no messages are streamed
|
516
516
|
# back to the client.
|
517
517
|
#
|
518
|
-
# Here's an example of a series of ten
|
518
|
+
# Here's an example of a series of ten `StreamingRecognizeResponse`s that might
|
519
519
|
# be returned while processing audio:
|
520
520
|
#
|
521
521
|
# 1. results { alternatives { transcript: "tube" } stability: 0.01 }
|
@@ -543,21 +543,21 @@ module Google
|
|
543
543
|
# Notes:
|
544
544
|
#
|
545
545
|
# * Only two of the above responses #4 and #7 contain final results; they are
|
546
|
-
# indicated by
|
546
|
+
# indicated by `is_final: true`. Concatenating these together generates the
|
547
547
|
# full transcript: "to be or not to be that is the question".
|
548
548
|
#
|
549
|
-
# * The others contain interim
|
550
|
-
#
|
549
|
+
# * The others contain interim `results`. #3 and #6 contain two interim
|
550
|
+
# `results`: the first portion has a high stability and is less likely to
|
551
551
|
# change; the second portion has a low stability and is very likely to
|
552
|
-
# change. A UI designer might choose to show only high stability
|
552
|
+
# change. A UI designer might choose to show only high stability `results`.
|
553
553
|
#
|
554
|
-
# * The specific
|
554
|
+
# * The specific `stability` and `confidence` values shown above are only for
|
555
555
|
# illustrative purposes. Actual values may vary.
|
556
556
|
#
|
557
557
|
# * In each response, only one of these fields will be set:
|
558
|
-
#
|
559
|
-
#
|
560
|
-
# one or more (repeated)
|
558
|
+
# `error`,
|
559
|
+
# `speech_event_type`, or
|
560
|
+
# one or more (repeated) `results`.
|
561
561
|
# @!attribute [rw] error
|
562
562
|
# @return [Google::Rpc::Status]
|
563
563
|
# Output only. If set, returns a {Google::Rpc::Status} message that
|
@@ -566,8 +566,8 @@ module Google
|
|
566
566
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::StreamingRecognitionResult>]
|
567
567
|
# Output only. This repeated list contains zero or more results that
|
568
568
|
# correspond to consecutive portions of the audio currently being processed.
|
569
|
-
# It contains zero or one
|
570
|
-
# followed by zero or more
|
569
|
+
# It contains zero or one `is_final=true` result (the newly settled portion),
|
570
|
+
# followed by zero or more `is_final=false` results (the interim results).
|
571
571
|
# @!attribute [rw] speech_event_type
|
572
572
|
# @return [Google::Cloud::Speech::V1p1beta1::StreamingRecognizeResponse::SpeechEventType]
|
573
573
|
# Output only. Indicates the type of speech event.
|
@@ -583,7 +583,7 @@ module Google
|
|
583
583
|
# additional results). The client should stop sending additional audio
|
584
584
|
# data, half-close the gRPC connection, and wait for any additional results
|
585
585
|
# until the server closes the gRPC connection. This event is only sent if
|
586
|
-
#
|
586
|
+
# `single_utterance` was set to `true`, and is not used otherwise.
|
587
587
|
END_OF_SINGLE_UTTERANCE = 1
|
588
588
|
end
|
589
589
|
end
|
@@ -593,14 +593,14 @@ module Google
|
|
593
593
|
# @!attribute [rw] alternatives
|
594
594
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionAlternative>]
|
595
595
|
# Output only. May contain one or more recognition hypotheses (up to the
|
596
|
-
# maximum specified in
|
596
|
+
# maximum specified in `max_alternatives`).
|
597
597
|
# These alternatives are ordered in terms of accuracy, with the top (first)
|
598
598
|
# alternative being the most probable, as ranked by the recognizer.
|
599
599
|
# @!attribute [rw] is_final
|
600
600
|
# @return [true, false]
|
601
|
-
# Output only. If
|
602
|
-
# interim result that may change. If
|
603
|
-
# speech service will return this particular
|
601
|
+
# Output only. If `false`, this `StreamingRecognitionResult` represents an
|
602
|
+
# interim result that may change. If `true`, this is the final time the
|
603
|
+
# speech service will return this particular `StreamingRecognitionResult`,
|
604
604
|
# the recognizer will not return any further hypotheses for this portion of
|
605
605
|
# the transcript and corresponding audio.
|
606
606
|
# @!attribute [rw] stability
|
@@ -608,8 +608,8 @@ module Google
|
|
608
608
|
# Output only. An estimate of the likelihood that the recognizer will not
|
609
609
|
# change its guess about this interim result. Values range from 0.0
|
610
610
|
# (completely unstable) to 1.0 (completely stable).
|
611
|
-
# This field is only provided for interim results (
|
612
|
-
# The default of 0.0 is a sentinel value indicating
|
611
|
+
# This field is only provided for interim results (`is_final=false`).
|
612
|
+
# The default of 0.0 is a sentinel value indicating `stability` was not set.
|
613
613
|
# @!attribute [rw] channel_tag
|
614
614
|
# @return [Integer]
|
615
615
|
# For multi-channel audio, this is the channel number corresponding to the
|
@@ -627,7 +627,7 @@ module Google
|
|
627
627
|
# @!attribute [rw] alternatives
|
628
628
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionAlternative>]
|
629
629
|
# Output only. May contain one or more recognition hypotheses (up to the
|
630
|
-
# maximum specified in
|
630
|
+
# maximum specified in `max_alternatives`).
|
631
631
|
# These alternatives are ordered in terms of accuracy, with the top (first)
|
632
632
|
# alternative being the most probable, as ranked by the recognizer.
|
633
633
|
# @!attribute [rw] channel_tag
|
@@ -652,10 +652,10 @@ module Google
|
|
652
652
|
# Output only. The confidence estimate between 0.0 and 1.0. A higher number
|
653
653
|
# indicates an estimated greater likelihood that the recognized words are
|
654
654
|
# correct. This field is set only for the top alternative of a non-streaming
|
655
|
-
# result or, of a streaming result where
|
655
|
+
# result or, of a streaming result where `is_final=true`.
|
656
656
|
# This field is not guaranteed to be accurate and users should not rely on it
|
657
657
|
# to be always provided.
|
658
|
-
# The default of 0.0 is a sentinel value indicating
|
658
|
+
# The default of 0.0 is a sentinel value indicating `confidence` was not set.
|
659
659
|
# @!attribute [rw] words
|
660
660
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::WordInfo>]
|
661
661
|
# Output only. A list of word-specific information for each recognized word.
|
@@ -668,7 +668,7 @@ module Google
|
|
668
668
|
# @return [Google::Protobuf::Duration]
|
669
669
|
# Output only. Time offset relative to the beginning of the audio,
|
670
670
|
# and corresponding to the start of the spoken word.
|
671
|
-
# This field is only set if
|
671
|
+
# This field is only set if `enable_word_time_offsets=true` and only
|
672
672
|
# in the top hypothesis.
|
673
673
|
# This is an experimental feature and the accuracy of the time offset can
|
674
674
|
# vary.
|
@@ -676,7 +676,7 @@ module Google
|
|
676
676
|
# @return [Google::Protobuf::Duration]
|
677
677
|
# Output only. Time offset relative to the beginning of the audio,
|
678
678
|
# and corresponding to the end of the spoken word.
|
679
|
-
# This field is only set if
|
679
|
+
# This field is only set if `enable_word_time_offsets=true` and only
|
680
680
|
# in the top hypothesis.
|
681
681
|
# This is an experimental feature and the accuracy of the time offset can
|
682
682
|
# vary.
|
@@ -688,10 +688,10 @@ module Google
|
|
688
688
|
# Output only. The confidence estimate between 0.0 and 1.0. A higher number
|
689
689
|
# indicates an estimated greater likelihood that the recognized words are
|
690
690
|
# correct. This field is set only for the top alternative of a non-streaming
|
691
|
-
# result or, of a streaming result where
|
691
|
+
# result or, of a streaming result where `is_final=true`.
|
692
692
|
# This field is not guaranteed to be accurate and users should not rely on it
|
693
693
|
# to be always provided.
|
694
|
-
# The default of 0.0 is a sentinel value indicating
|
694
|
+
# The default of 0.0 is a sentinel value indicating `confidence` was not set.
|
695
695
|
# @!attribute [rw] speaker_tag
|
696
696
|
# @return [Integer]
|
697
697
|
# Output only. A distinct integer value is assigned for every speaker within
|