google-cloud-speech 0.31.0 → 0.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -5
- data/lib/google/cloud/speech.rb +4 -4
- data/lib/google/cloud/speech/v1.rb +4 -4
- data/lib/google/cloud/speech/v1/doc/google/cloud/speech/v1/cloud_speech.rb +97 -97
- data/lib/google/cloud/speech/v1/doc/google/longrunning/operations.rb +9 -9
- data/lib/google/cloud/speech/v1/doc/google/protobuf/any.rb +8 -8
- data/lib/google/cloud/speech/v1/doc/google/protobuf/duration.rb +3 -3
- data/lib/google/cloud/speech/v1/doc/google/rpc/status.rb +11 -11
- data/lib/google/cloud/speech/v1/speech_client.rb +2 -2
- data/lib/google/cloud/speech/v1p1beta1.rb +4 -4
- data/lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb +107 -107
- data/lib/google/cloud/speech/v1p1beta1/doc/google/longrunning/operations.rb +9 -9
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/any.rb +8 -8
- data/lib/google/cloud/speech/v1p1beta1/doc/google/protobuf/duration.rb +3 -3
- data/lib/google/cloud/speech/v1p1beta1/doc/google/rpc/status.rb +11 -11
- data/lib/google/cloud/speech/v1p1beta1/speech_client.rb +2 -2
- metadata +2 -4
- data/lib/google/cloud/speech/v1/doc/overview.rb +0 -99
- data/lib/google/cloud/speech/v1p1beta1/doc/overview.rb +0 -99
|
@@ -21,7 +21,7 @@ module Google
|
|
|
21
21
|
# @return [String]
|
|
22
22
|
# The server-assigned name, which is only unique within the same service that
|
|
23
23
|
# originally returns it. If you use the default HTTP mapping, the
|
|
24
|
-
#
|
|
24
|
+
# `name` should have the format of `operations/some/unique/name`.
|
|
25
25
|
# @!attribute [rw] metadata
|
|
26
26
|
# @return [Google::Protobuf::Any]
|
|
27
27
|
# Service-specific metadata associated with the operation. It typically
|
|
@@ -30,8 +30,8 @@ module Google
|
|
|
30
30
|
# long-running operation should document the metadata type, if any.
|
|
31
31
|
# @!attribute [rw] done
|
|
32
32
|
# @return [true, false]
|
|
33
|
-
# If the value is
|
|
34
|
-
# If true, the operation is completed, and either
|
|
33
|
+
# If the value is `false`, it means the operation is still in progress.
|
|
34
|
+
# If true, the operation is completed, and either `error` or `response` is
|
|
35
35
|
# available.
|
|
36
36
|
# @!attribute [rw] error
|
|
37
37
|
# @return [Google::Rpc::Status]
|
|
@@ -39,13 +39,13 @@ module Google
|
|
|
39
39
|
# @!attribute [rw] response
|
|
40
40
|
# @return [Google::Protobuf::Any]
|
|
41
41
|
# The normal response of the operation in case of success. If the original
|
|
42
|
-
# method returns no data on success, such as
|
|
43
|
-
#
|
|
44
|
-
#
|
|
45
|
-
# methods, the response should have the type
|
|
42
|
+
# method returns no data on success, such as `Delete`, the response is
|
|
43
|
+
# `google.protobuf.Empty`. If the original method is standard
|
|
44
|
+
# `Get`/`Create`/`Update`, the response should be the resource. For other
|
|
45
|
+
# methods, the response should have the type `XxxResponse`, where `Xxx`
|
|
46
46
|
# is the original method name. For example, if the original method name
|
|
47
|
-
# is
|
|
48
|
-
#
|
|
47
|
+
# is `TakeSnapshot()`, the inferred response type is
|
|
48
|
+
# `TakeSnapshotResponse`.
|
|
49
49
|
class Operation; end
|
|
50
50
|
|
|
51
51
|
# The request message for {Google::Longrunning::Operations::GetOperation Operations::GetOperation}.
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
module Google
|
|
17
17
|
module Protobuf
|
|
18
|
-
#
|
|
18
|
+
# `Any` contains an arbitrary serialized protocol buffer message along with a
|
|
19
19
|
# URL that describes the type of the serialized message.
|
|
20
20
|
#
|
|
21
21
|
# Protobuf library provides support to pack/unpack Any values in the form
|
|
@@ -69,9 +69,9 @@ module Google
|
|
|
69
69
|
#
|
|
70
70
|
# = JSON
|
|
71
71
|
#
|
|
72
|
-
# The JSON representation of an
|
|
72
|
+
# The JSON representation of an `Any` value uses the regular
|
|
73
73
|
# representation of the deserialized, embedded message, with an
|
|
74
|
-
# additional field
|
|
74
|
+
# additional field `@type` which contains the type URL. Example:
|
|
75
75
|
#
|
|
76
76
|
# package google.profile;
|
|
77
77
|
# message Person {
|
|
@@ -87,7 +87,7 @@ module Google
|
|
|
87
87
|
#
|
|
88
88
|
# If the embedded message type is well-known and has a custom JSON
|
|
89
89
|
# representation, that representation will be embedded adding a field
|
|
90
|
-
#
|
|
90
|
+
# `value` which holds the custom JSON in addition to the `@type`
|
|
91
91
|
# field. Example (for message {Google::Protobuf::Duration}):
|
|
92
92
|
#
|
|
93
93
|
# {
|
|
@@ -99,15 +99,15 @@ module Google
|
|
|
99
99
|
# A URL/resource name that uniquely identifies the type of the serialized
|
|
100
100
|
# protocol buffer message. The last segment of the URL's path must represent
|
|
101
101
|
# the fully qualified name of the type (as in
|
|
102
|
-
#
|
|
102
|
+
# `path/google.protobuf.Duration`). The name should be in a canonical form
|
|
103
103
|
# (e.g., leading "." is not accepted).
|
|
104
104
|
#
|
|
105
105
|
# In practice, teams usually precompile into the binary all types that they
|
|
106
106
|
# expect it to use in the context of Any. However, for URLs which use the
|
|
107
|
-
# scheme
|
|
107
|
+
# scheme `http`, `https`, or no scheme, one can optionally set up a type
|
|
108
108
|
# server that maps type URLs to message definitions as follows:
|
|
109
109
|
#
|
|
110
|
-
# * If no scheme is provided,
|
|
110
|
+
# * If no scheme is provided, `https` is assumed.
|
|
111
111
|
# * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
|
|
112
112
|
# value in binary format, or produce an error.
|
|
113
113
|
# * Applications are allowed to cache lookup results based on the
|
|
@@ -120,7 +120,7 @@ module Google
|
|
|
120
120
|
# protobuf release, and it is not used for type URLs beginning with
|
|
121
121
|
# type.googleapis.com.
|
|
122
122
|
#
|
|
123
|
-
# Schemes other than
|
|
123
|
+
# Schemes other than `http`, `https` (or the empty scheme) might be
|
|
124
124
|
# used with implementation specific semantics.
|
|
125
125
|
# @!attribute [rw] value
|
|
126
126
|
# @return [String]
|
|
@@ -82,9 +82,9 @@ module Google
|
|
|
82
82
|
# @return [Integer]
|
|
83
83
|
# Signed fractions of a second at nanosecond resolution of the span
|
|
84
84
|
# of time. Durations less than one second are represented with a 0
|
|
85
|
-
#
|
|
86
|
-
# of one second or more, a non-zero value for the
|
|
87
|
-
# of the same sign as the
|
|
85
|
+
# `seconds` field and a positive or negative `nanos` field. For durations
|
|
86
|
+
# of one second or more, a non-zero value for the `nanos` field must be
|
|
87
|
+
# of the same sign as the `seconds` field. Must be from -999,999,999
|
|
88
88
|
# to +999,999,999 inclusive.
|
|
89
89
|
class Duration; end
|
|
90
90
|
end
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
module Google
|
|
17
17
|
module Rpc
|
|
18
|
-
# The
|
|
18
|
+
# The `Status` type defines a logical error model that is suitable for different
|
|
19
19
|
# programming environments, including REST APIs and RPC APIs. It is used by
|
|
20
20
|
# [gRPC](https://github.com/grpc). The error model is designed to be:
|
|
21
21
|
#
|
|
@@ -24,7 +24,7 @@ module Google
|
|
|
24
24
|
#
|
|
25
25
|
# = Overview
|
|
26
26
|
#
|
|
27
|
-
# The
|
|
27
|
+
# The `Status` message contains three pieces of data: error code, error message,
|
|
28
28
|
# and error details. The error code should be an enum value of
|
|
29
29
|
# {Google::Rpc::Code}, but it may accept additional error codes if needed. The
|
|
30
30
|
# error message should be a developer-facing English message that helps
|
|
@@ -32,40 +32,40 @@ module Google
|
|
|
32
32
|
# error message is needed, put the localized message in the error details or
|
|
33
33
|
# localize it in the client. The optional error details may contain arbitrary
|
|
34
34
|
# information about the error. There is a predefined set of error detail types
|
|
35
|
-
# in the package
|
|
35
|
+
# in the package `google.rpc` that can be used for common error conditions.
|
|
36
36
|
#
|
|
37
37
|
# = Language mapping
|
|
38
38
|
#
|
|
39
|
-
# The
|
|
40
|
-
# is not necessarily the actual wire format. When the
|
|
39
|
+
# The `Status` message is the logical representation of the error model, but it
|
|
40
|
+
# is not necessarily the actual wire format. When the `Status` message is
|
|
41
41
|
# exposed in different client libraries and different wire protocols, it can be
|
|
42
42
|
# mapped differently. For example, it will likely be mapped to some exceptions
|
|
43
43
|
# in Java, but more likely mapped to some error codes in C.
|
|
44
44
|
#
|
|
45
45
|
# = Other uses
|
|
46
46
|
#
|
|
47
|
-
# The error model and the
|
|
47
|
+
# The error model and the `Status` message can be used in a variety of
|
|
48
48
|
# environments, either with or without APIs, to provide a
|
|
49
49
|
# consistent developer experience across different environments.
|
|
50
50
|
#
|
|
51
51
|
# Example uses of this error model include:
|
|
52
52
|
#
|
|
53
53
|
# * Partial errors. If a service needs to return partial errors to the client,
|
|
54
|
-
# it may embed the
|
|
54
|
+
# it may embed the `Status` in the normal response to indicate the partial
|
|
55
55
|
# errors.
|
|
56
56
|
#
|
|
57
57
|
# * Workflow errors. A typical workflow has multiple steps. Each step may
|
|
58
|
-
# have a
|
|
58
|
+
# have a `Status` message for error reporting.
|
|
59
59
|
#
|
|
60
60
|
# * Batch operations. If a client uses batch request and batch response, the
|
|
61
|
-
#
|
|
61
|
+
# `Status` message should be used directly inside batch response, one for
|
|
62
62
|
# each error sub-response.
|
|
63
63
|
#
|
|
64
64
|
# * Asynchronous operations. If an API call embeds asynchronous operation
|
|
65
65
|
# results in its response, the status of those operations should be
|
|
66
|
-
# represented directly using the
|
|
66
|
+
# represented directly using the `Status` message.
|
|
67
67
|
#
|
|
68
|
-
# * Logging. If some API errors are stored in logs, the message
|
|
68
|
+
# * Logging. If some API errors are stored in logs, the message `Status` could
|
|
69
69
|
# be used directly after any stripping needed for security/privacy reasons.
|
|
70
70
|
# @!attribute [rw] code
|
|
71
71
|
# @return [Integer]
|
|
@@ -246,8 +246,8 @@ module Google
|
|
|
246
246
|
|
|
247
247
|
# Performs asynchronous speech recognition: receive results via the
|
|
248
248
|
# google.longrunning.Operations interface. Returns either an
|
|
249
|
-
#
|
|
250
|
-
# a
|
|
249
|
+
# `Operation.error` or an `Operation.response` which contains
|
|
250
|
+
# a `LongRunningRecognizeResponse` message.
|
|
251
251
|
#
|
|
252
252
|
# @param config [Google::Cloud::Speech::V1::RecognitionConfig | Hash]
|
|
253
253
|
# *Required* Provides information to the recognizer that specifies how to
|
|
@@ -22,7 +22,7 @@ module Google
|
|
|
22
22
|
# rubocop:disable LineLength
|
|
23
23
|
|
|
24
24
|
##
|
|
25
|
-
# # Ruby Client for Cloud Speech API ([Alpha](https://github.com/
|
|
25
|
+
# # Ruby Client for Cloud Speech API ([Alpha](https://github.com/googleapis/google-cloud-ruby#versioning))
|
|
26
26
|
#
|
|
27
27
|
# [Cloud Speech API][Product Documentation]:
|
|
28
28
|
# Converts audio to text by applying powerful neural network models.
|
|
@@ -35,7 +35,7 @@ module Google
|
|
|
35
35
|
# 1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project)
|
|
36
36
|
# 2. [Enable billing for your project.](https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project)
|
|
37
37
|
# 3. [Enable the Cloud Speech API.](https://console.cloud.google.com/apis/library/speech.googleapis.com)
|
|
38
|
-
# 4. [Setup Authentication.](https://
|
|
38
|
+
# 4. [Setup Authentication.](https://googleapis.github.io/google-cloud-ruby/#/docs/google-cloud/master/guides/authentication)
|
|
39
39
|
#
|
|
40
40
|
# ### Installation
|
|
41
41
|
# ```
|
|
@@ -64,7 +64,7 @@ module Google
|
|
|
64
64
|
# ### Next Steps
|
|
65
65
|
# - Read the [Cloud Speech API Product documentation][Product Documentation]
|
|
66
66
|
# to learn more about the product and see How-to Guides.
|
|
67
|
-
# - View this [repository's main README](https://github.com/
|
|
67
|
+
# - View this [repository's main README](https://github.com/googleapis/google-cloud-ruby/blob/master/README.md)
|
|
68
68
|
# to see the full list of Cloud APIs that we cover.
|
|
69
69
|
#
|
|
70
70
|
# [Product Documentation]: https://cloud.google.com/speech
|
|
@@ -73,7 +73,7 @@ module Google
|
|
|
73
73
|
#
|
|
74
74
|
# To enable logging for this library, set the logger for the underlying [gRPC](https://github.com/grpc/grpc/tree/master/src/ruby) library.
|
|
75
75
|
# The logger that you set may be a Ruby stdlib [`Logger`](https://ruby-doc.org/stdlib-2.5.0/libdoc/logger/rdoc/Logger.html) as shown below,
|
|
76
|
-
# or a [`Google::Cloud::Logging::Logger`](https://
|
|
76
|
+
# or a [`Google::Cloud::Logging::Logger`](https://googleapis.github.io/google-cloud-ruby/#/docs/google-cloud-logging/latest/google/cloud/logging/logger)
|
|
77
77
|
# that will write logs to [Stackdriver Logging](https://cloud.google.com/logging/). See [grpc/logconfig.rb](https://github.com/grpc/grpc/blob/master/src/ruby/lib/grpc/logconfig.rb)
|
|
78
78
|
# and the gRPC [spec_helper.rb](https://github.com/grpc/grpc/blob/master/src/ruby/spec/spec_helper.rb) for additional information.
|
|
79
79
|
#
|
|
@@ -17,7 +17,7 @@ module Google
|
|
|
17
17
|
module Cloud
|
|
18
18
|
module Speech
|
|
19
19
|
module V1p1beta1
|
|
20
|
-
# The top-level message sent by the client for the
|
|
20
|
+
# The top-level message sent by the client for the `Recognize` method.
|
|
21
21
|
# @!attribute [rw] config
|
|
22
22
|
# @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig]
|
|
23
23
|
# *Required* Provides information to the recognizer that specifies how to
|
|
@@ -27,7 +27,7 @@ module Google
|
|
|
27
27
|
# *Required* The audio data to be recognized.
|
|
28
28
|
class RecognizeRequest; end
|
|
29
29
|
|
|
30
|
-
# The top-level message sent by the client for the
|
|
30
|
+
# The top-level message sent by the client for the `LongRunningRecognize`
|
|
31
31
|
# method.
|
|
32
32
|
# @!attribute [rw] config
|
|
33
33
|
# @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig]
|
|
@@ -38,24 +38,24 @@ module Google
|
|
|
38
38
|
# *Required* The audio data to be recognized.
|
|
39
39
|
class LongRunningRecognizeRequest; end
|
|
40
40
|
|
|
41
|
-
# The top-level message sent by the client for the
|
|
42
|
-
# Multiple
|
|
43
|
-
# must contain a
|
|
44
|
-
# All subsequent messages must contain
|
|
45
|
-
#
|
|
41
|
+
# The top-level message sent by the client for the `StreamingRecognize` method.
|
|
42
|
+
# Multiple `StreamingRecognizeRequest` messages are sent. The first message
|
|
43
|
+
# must contain a `streaming_config` message and must not contain `audio` data.
|
|
44
|
+
# All subsequent messages must contain `audio` data and must not contain a
|
|
45
|
+
# `streaming_config` message.
|
|
46
46
|
# @!attribute [rw] streaming_config
|
|
47
47
|
# @return [Google::Cloud::Speech::V1p1beta1::StreamingRecognitionConfig]
|
|
48
48
|
# Provides information to the recognizer that specifies how to process the
|
|
49
|
-
# request. The first
|
|
50
|
-
#
|
|
49
|
+
# request. The first `StreamingRecognizeRequest` message must contain a
|
|
50
|
+
# `streaming_config` message.
|
|
51
51
|
# @!attribute [rw] audio_content
|
|
52
52
|
# @return [String]
|
|
53
53
|
# The audio data to be recognized. Sequential chunks of audio data are sent
|
|
54
|
-
# in sequential
|
|
55
|
-
#
|
|
56
|
-
# and all subsequent
|
|
57
|
-
#
|
|
58
|
-
#
|
|
54
|
+
# in sequential `StreamingRecognizeRequest` messages. The first
|
|
55
|
+
# `StreamingRecognizeRequest` message must not contain `audio_content` data
|
|
56
|
+
# and all subsequent `StreamingRecognizeRequest` messages must contain
|
|
57
|
+
# `audio_content` data. The audio bytes must be encoded as specified in
|
|
58
|
+
# `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a
|
|
59
59
|
# pure binary representation (not base64). See
|
|
60
60
|
# [audio limits](https://cloud.google.com/speech/limits#content).
|
|
61
61
|
class StreamingRecognizeRequest; end
|
|
@@ -68,49 +68,49 @@ module Google
|
|
|
68
68
|
# process the request.
|
|
69
69
|
# @!attribute [rw] single_utterance
|
|
70
70
|
# @return [true, false]
|
|
71
|
-
# *Optional* If
|
|
71
|
+
# *Optional* If `false` or omitted, the recognizer will perform continuous
|
|
72
72
|
# recognition (continuing to wait for and process audio even if the user
|
|
73
73
|
# pauses speaking) until the client closes the input stream (gRPC API) or
|
|
74
74
|
# until the maximum time limit has been reached. May return multiple
|
|
75
|
-
#
|
|
75
|
+
# `StreamingRecognitionResult`s with the `is_final` flag set to `true`.
|
|
76
76
|
#
|
|
77
|
-
# If
|
|
77
|
+
# If `true`, the recognizer will detect a single spoken utterance. When it
|
|
78
78
|
# detects that the user has paused or stopped speaking, it will return an
|
|
79
|
-
#
|
|
80
|
-
# more than one
|
|
81
|
-
#
|
|
79
|
+
# `END_OF_SINGLE_UTTERANCE` event and cease recognition. It will return no
|
|
80
|
+
# more than one `StreamingRecognitionResult` with the `is_final` flag set to
|
|
81
|
+
# `true`.
|
|
82
82
|
# @!attribute [rw] interim_results
|
|
83
83
|
# @return [true, false]
|
|
84
|
-
# *Optional* If
|
|
84
|
+
# *Optional* If `true`, interim results (tentative hypotheses) may be
|
|
85
85
|
# returned as they become available (these interim results are indicated with
|
|
86
|
-
# the
|
|
87
|
-
# If
|
|
86
|
+
# the `is_final=false` flag).
|
|
87
|
+
# If `false` or omitted, only `is_final=true` result(s) are returned.
|
|
88
88
|
class StreamingRecognitionConfig; end
|
|
89
89
|
|
|
90
90
|
# Provides information to the recognizer that specifies how to process the
|
|
91
91
|
# request.
|
|
92
92
|
# @!attribute [rw] encoding
|
|
93
93
|
# @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding]
|
|
94
|
-
# Encoding of audio data sent in all
|
|
95
|
-
# This field is optional for
|
|
94
|
+
# Encoding of audio data sent in all `RecognitionAudio` messages.
|
|
95
|
+
# This field is optional for `FLAC` and `WAV` audio files and required
|
|
96
96
|
# for all other audio formats. For details, see {Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
|
97
97
|
# @!attribute [rw] sample_rate_hertz
|
|
98
98
|
# @return [Integer]
|
|
99
99
|
# Sample rate in Hertz of the audio data sent in all
|
|
100
|
-
#
|
|
100
|
+
# `RecognitionAudio` messages. Valid values are: 8000-48000.
|
|
101
101
|
# 16000 is optimal. For best results, set the sampling rate of the audio
|
|
102
102
|
# source to 16000 Hz. If that's not possible, use the native sample rate of
|
|
103
103
|
# the audio source (instead of re-sampling).
|
|
104
|
-
# This field is optional for
|
|
104
|
+
# This field is optional for `FLAC` and `WAV` audio files and required
|
|
105
105
|
# for all other audio formats. For details, see {Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}.
|
|
106
106
|
# @!attribute [rw] audio_channel_count
|
|
107
107
|
# @return [Integer]
|
|
108
108
|
# *Optional* The number of channels in the input audio data.
|
|
109
109
|
# ONLY set this for MULTI-CHANNEL recognition.
|
|
110
|
-
# Valid values for LINEAR16 and FLAC are
|
|
110
|
+
# Valid values for LINEAR16 and FLAC are `1`-`8`.
|
|
111
111
|
# Valid values for OGG_OPUS are '1'-'254'.
|
|
112
|
-
# Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only
|
|
113
|
-
# If
|
|
112
|
+
# Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
|
|
113
|
+
# If `0` or omitted, defaults to one channel (mono).
|
|
114
114
|
# NOTE: We only recognize the first channel by default.
|
|
115
115
|
# To perform independent recognition on each channel set
|
|
116
116
|
# enable_separate_recognition_per_channel to 'true'.
|
|
@@ -146,31 +146,31 @@ module Google
|
|
|
146
146
|
# @!attribute [rw] max_alternatives
|
|
147
147
|
# @return [Integer]
|
|
148
148
|
# *Optional* Maximum number of recognition hypotheses to be returned.
|
|
149
|
-
# Specifically, the maximum number of
|
|
150
|
-
# within each
|
|
151
|
-
# The server may return fewer than
|
|
152
|
-
# Valid values are
|
|
149
|
+
# Specifically, the maximum number of `SpeechRecognitionAlternative` messages
|
|
150
|
+
# within each `SpeechRecognitionResult`.
|
|
151
|
+
# The server may return fewer than `max_alternatives`.
|
|
152
|
+
# Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
|
|
153
153
|
# one. If omitted, will return a maximum of one.
|
|
154
154
|
# @!attribute [rw] profanity_filter
|
|
155
155
|
# @return [true, false]
|
|
156
|
-
# *Optional* If set to
|
|
156
|
+
# *Optional* If set to `true`, the server will attempt to filter out
|
|
157
157
|
# profanities, replacing all but the initial character in each filtered word
|
|
158
|
-
# with asterisks, e.g. "f***". If set to
|
|
158
|
+
# with asterisks, e.g. "f***". If set to `false` or omitted, profanities
|
|
159
159
|
# won't be filtered out.
|
|
160
160
|
# @!attribute [rw] speech_contexts
|
|
161
161
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechContext>]
|
|
162
162
|
# *Optional* A means to provide context to assist the speech recognition.
|
|
163
163
|
# @!attribute [rw] enable_word_time_offsets
|
|
164
164
|
# @return [true, false]
|
|
165
|
-
# *Optional* If
|
|
165
|
+
# *Optional* If `true`, the top result includes a list of words and
|
|
166
166
|
# the start and end time offsets (timestamps) for those words. If
|
|
167
|
-
#
|
|
168
|
-
#
|
|
167
|
+
# `false`, no word-level time offset information is returned. The default is
|
|
168
|
+
# `false`.
|
|
169
169
|
# @!attribute [rw] enable_word_confidence
|
|
170
170
|
# @return [true, false]
|
|
171
|
-
# *Optional* If
|
|
172
|
-
# confidence for those words. If
|
|
173
|
-
# information is returned. The default is
|
|
171
|
+
# *Optional* If `true`, the top result includes a list of words and the
|
|
172
|
+
# confidence for those words. If `false`, no word-level confidence
|
|
173
|
+
# information is returned. The default is `false`.
|
|
174
174
|
# @!attribute [rw] enable_automatic_punctuation
|
|
175
175
|
# @return [true, false]
|
|
176
176
|
# *Optional* If 'true', adds punctuation to recognition result hypotheses.
|
|
@@ -235,15 +235,15 @@ module Google
|
|
|
235
235
|
# @!attribute [rw] use_enhanced
|
|
236
236
|
# @return [true, false]
|
|
237
237
|
# *Optional* Set to true to use an enhanced model for speech recognition.
|
|
238
|
-
# You must also set the
|
|
239
|
-
#
|
|
240
|
-
#
|
|
238
|
+
# You must also set the `model` field to a valid, enhanced model. If
|
|
239
|
+
# `use_enhanced` is set to true and the `model` field is not set, then
|
|
240
|
+
# `use_enhanced` is ignored. If `use_enhanced` is true and an enhanced
|
|
241
241
|
# version of the specified model does not exist, then the speech is
|
|
242
242
|
# recognized using the standard version of the specified model.
|
|
243
243
|
#
|
|
244
244
|
# Enhanced speech models require that you opt-in to the audio logging using
|
|
245
245
|
# instructions in the [alpha documentation](https://cloud.google.com/speech/data-sharing). If you set
|
|
246
|
-
#
|
|
246
|
+
# `use_enhanced` to true and you have not enabled audio logging, then you
|
|
247
247
|
# will receive an error.
|
|
248
248
|
class RecognitionConfig
|
|
249
249
|
# The encoding of the audio data sent in the request.
|
|
@@ -251,18 +251,18 @@ module Google
|
|
|
251
251
|
# All encodings support only 1 channel (mono) audio.
|
|
252
252
|
#
|
|
253
253
|
# For best results, the audio source should be captured and transmitted using
|
|
254
|
-
# a lossless encoding (
|
|
254
|
+
# a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
|
|
255
255
|
# recognition can be reduced if lossy codecs are used to capture or transmit
|
|
256
256
|
# audio, particularly if background noise is present. Lossy codecs include
|
|
257
|
-
#
|
|
257
|
+
# `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, and `SPEEX_WITH_HEADER_BYTE`.
|
|
258
258
|
#
|
|
259
|
-
# The
|
|
260
|
-
# included audio content. You can request recognition for
|
|
261
|
-
# contain either
|
|
262
|
-
# If you send
|
|
263
|
-
# your request, you do not need to specify an
|
|
259
|
+
# The `FLAC` and `WAV` audio file formats include a header that describes the
|
|
260
|
+
# included audio content. You can request recognition for `WAV` files that
|
|
261
|
+
# contain either `LINEAR16` or `MULAW` encoded audio.
|
|
262
|
+
# If you send `FLAC` or `WAV` audio file format in
|
|
263
|
+
# your request, you do not need to specify an `AudioEncoding`; the audio
|
|
264
264
|
# encoding format is determined from the file header. If you specify
|
|
265
|
-
# an
|
|
265
|
+
# an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
|
|
266
266
|
# encoding configuration must match the encoding described in the audio
|
|
267
267
|
# header; otherwise the request returns an
|
|
268
268
|
# {Google::Rpc::Code::INVALID_ARGUMENT} error code.
|
|
@@ -273,33 +273,33 @@ module Google
|
|
|
273
273
|
# Uncompressed 16-bit signed little-endian samples (Linear PCM).
|
|
274
274
|
LINEAR16 = 1
|
|
275
275
|
|
|
276
|
-
#
|
|
276
|
+
# `FLAC` (Free Lossless Audio
|
|
277
277
|
# Codec) is the recommended encoding because it is
|
|
278
278
|
# lossless--therefore recognition is not compromised--and
|
|
279
|
-
# requires only about half the bandwidth of
|
|
279
|
+
# requires only about half the bandwidth of `LINEAR16`. `FLAC` stream
|
|
280
280
|
# encoding supports 16-bit and 24-bit samples, however, not all fields in
|
|
281
|
-
#
|
|
281
|
+
# `STREAMINFO` are supported.
|
|
282
282
|
FLAC = 2
|
|
283
283
|
|
|
284
284
|
# 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
|
|
285
285
|
MULAW = 3
|
|
286
286
|
|
|
287
|
-
# Adaptive Multi-Rate Narrowband codec.
|
|
287
|
+
# Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000.
|
|
288
288
|
AMR = 4
|
|
289
289
|
|
|
290
|
-
# Adaptive Multi-Rate Wideband codec.
|
|
290
|
+
# Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000.
|
|
291
291
|
AMR_WB = 5
|
|
292
292
|
|
|
293
293
|
# Opus encoded audio frames in Ogg container
|
|
294
294
|
# ([OggOpus](https://wiki.xiph.org/OggOpus)).
|
|
295
|
-
#
|
|
295
|
+
# `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000.
|
|
296
296
|
OGG_OPUS = 6
|
|
297
297
|
|
|
298
298
|
# Although the use of lossy encodings is not recommended, if a very low
|
|
299
|
-
# bitrate encoding is required,
|
|
299
|
+
# bitrate encoding is required, `OGG_OPUS` is highly preferred over
|
|
300
300
|
# Speex encoding. The [Speex](https://speex.org/) encoding supported by
|
|
301
301
|
# Cloud Speech API has a header byte in each block, as in MIME type
|
|
302
|
-
#
|
|
302
|
+
# `audio/x-speex-with-header-byte`.
|
|
303
303
|
# It is a variant of the RTP Speex encoding defined in
|
|
304
304
|
# [RFC 5574](https://tools.ietf.org/html/rfc5574).
|
|
305
305
|
# The stream is a sequence of blocks, one block per RTP packet. Each block
|
|
@@ -307,7 +307,7 @@ module Google
|
|
|
307
307
|
# by one or more frames of Speex data, padded to an integral number of
|
|
308
308
|
# bytes (octets) as specified in RFC 5574. In other words, each RTP header
|
|
309
309
|
# is replaced with a single byte containing the block length. Only Speex
|
|
310
|
-
# wideband is supported.
|
|
310
|
+
# wideband is supported. `sample_rate_hertz` must be 16000.
|
|
311
311
|
SPEEX_WITH_HEADER_BYTE = 7
|
|
312
312
|
end
|
|
313
313
|
end
|
|
@@ -338,8 +338,8 @@ module Google
|
|
|
338
338
|
# 'Cardioid Microphone'.
|
|
339
339
|
# @!attribute [rw] original_mime_type
|
|
340
340
|
# @return [String]
|
|
341
|
-
# Mime type of the original audio file. For example
|
|
342
|
-
#
|
|
341
|
+
# Mime type of the original audio file. For example `audio/m4a`,
|
|
342
|
+
# `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
|
|
343
343
|
# A list of possible audio mime types is maintained at
|
|
344
344
|
# http://www.iana.org/assignments/media-types/media-types.xhtml#audio
|
|
345
345
|
# @!attribute [rw] obfuscated_id
|
|
@@ -455,27 +455,27 @@ module Google
|
|
|
455
455
|
# [usage limits](https://cloud.google.com/speech/limits#content).
|
|
456
456
|
class SpeechContext; end
|
|
457
457
|
|
|
458
|
-
# Contains audio data in the encoding specified in the
|
|
459
|
-
# Either
|
|
458
|
+
# Contains audio data in the encoding specified in the `RecognitionConfig`.
|
|
459
|
+
# Either `content` or `uri` must be supplied. Supplying both or neither
|
|
460
460
|
# returns {Google::Rpc::Code::INVALID_ARGUMENT}. See
|
|
461
461
|
# [audio limits](https://cloud.google.com/speech/limits#content).
|
|
462
462
|
# @!attribute [rw] content
|
|
463
463
|
# @return [String]
|
|
464
464
|
# The audio data bytes encoded as specified in
|
|
465
|
-
#
|
|
465
|
+
# `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a
|
|
466
466
|
# pure binary representation, whereas JSON representations use base64.
|
|
467
467
|
# @!attribute [rw] uri
|
|
468
468
|
# @return [String]
|
|
469
469
|
# URI that points to a file that contains audio data bytes as specified in
|
|
470
|
-
#
|
|
470
|
+
# `RecognitionConfig`. Currently, only Google Cloud Storage URIs are
|
|
471
471
|
# supported, which must be specified in the following format:
|
|
472
|
-
#
|
|
472
|
+
# `gs://bucket_name/object_name` (other URI formats return
|
|
473
473
|
# {Google::Rpc::Code::INVALID_ARGUMENT}). For more information, see
|
|
474
474
|
# [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
|
|
475
475
|
class RecognitionAudio; end
|
|
476
476
|
|
|
477
|
-
# The only message returned to the client by the
|
|
478
|
-
# contains the result as zero or more sequential
|
|
477
|
+
# The only message returned to the client by the `Recognize` method. It
|
|
478
|
+
# contains the result as zero or more sequential `SpeechRecognitionResult`
|
|
479
479
|
# messages.
|
|
480
480
|
# @!attribute [rw] results
|
|
481
481
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionResult>]
|
|
@@ -483,10 +483,10 @@ module Google
|
|
|
483
483
|
# sequential portions of audio.
|
|
484
484
|
class RecognizeResponse; end
|
|
485
485
|
|
|
486
|
-
# The only message returned to the client by the
|
|
487
|
-
# It contains the result as zero or more sequential
|
|
488
|
-
# messages. It is included in the
|
|
489
|
-
# returned by the
|
|
486
|
+
# The only message returned to the client by the `LongRunningRecognize` method.
|
|
487
|
+
# It contains the result as zero or more sequential `SpeechRecognitionResult`
|
|
488
|
+
# messages. It is included in the `result.response` field of the `Operation`
|
|
489
|
+
# returned by the `GetOperation` call of the `google::longrunning::Operations`
|
|
490
490
|
# service.
|
|
491
491
|
# @!attribute [rw] results
|
|
492
492
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionResult>]
|
|
@@ -494,9 +494,9 @@ module Google
|
|
|
494
494
|
# sequential portions of audio.
|
|
495
495
|
class LongRunningRecognizeResponse; end
|
|
496
496
|
|
|
497
|
-
# Describes the progress of a long-running
|
|
498
|
-
# included in the
|
|
499
|
-
#
|
|
497
|
+
# Describes the progress of a long-running `LongRunningRecognize` call. It is
|
|
498
|
+
# included in the `metadata` field of the `Operation` returned by the
|
|
499
|
+
# `GetOperation` call of the `google::longrunning::Operations` service.
|
|
500
500
|
# @!attribute [rw] progress_percent
|
|
501
501
|
# @return [Integer]
|
|
502
502
|
# Approximate percentage of audio processed thus far. Guaranteed to be 100
|
|
@@ -509,13 +509,13 @@ module Google
|
|
|
509
509
|
# Time of the most recent processing update.
|
|
510
510
|
class LongRunningRecognizeMetadata; end
|
|
511
511
|
|
|
512
|
-
#
|
|
513
|
-
#
|
|
512
|
+
# `StreamingRecognizeResponse` is the only message returned to the client by
|
|
513
|
+
# `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse`
|
|
514
514
|
# messages are streamed back to the client. If there is no recognizable
|
|
515
|
-
# audio, and
|
|
515
|
+
# audio, and `single_utterance` is set to false, then no messages are streamed
|
|
516
516
|
# back to the client.
|
|
517
517
|
#
|
|
518
|
-
# Here's an example of a series of ten
|
|
518
|
+
# Here's an example of a series of ten `StreamingRecognizeResponse`s that might
|
|
519
519
|
# be returned while processing audio:
|
|
520
520
|
#
|
|
521
521
|
# 1. results { alternatives { transcript: "tube" } stability: 0.01 }
|
|
@@ -543,21 +543,21 @@ module Google
|
|
|
543
543
|
# Notes:
|
|
544
544
|
#
|
|
545
545
|
# * Only two of the above responses #4 and #7 contain final results; they are
|
|
546
|
-
# indicated by
|
|
546
|
+
# indicated by `is_final: true`. Concatenating these together generates the
|
|
547
547
|
# full transcript: "to be or not to be that is the question".
|
|
548
548
|
#
|
|
549
|
-
# * The others contain interim
|
|
550
|
-
#
|
|
549
|
+
# * The others contain interim `results`. #3 and #6 contain two interim
|
|
550
|
+
# `results`: the first portion has a high stability and is less likely to
|
|
551
551
|
# change; the second portion has a low stability and is very likely to
|
|
552
|
-
# change. A UI designer might choose to show only high stability
|
|
552
|
+
# change. A UI designer might choose to show only high stability `results`.
|
|
553
553
|
#
|
|
554
|
-
# * The specific
|
|
554
|
+
# * The specific `stability` and `confidence` values shown above are only for
|
|
555
555
|
# illustrative purposes. Actual values may vary.
|
|
556
556
|
#
|
|
557
557
|
# * In each response, only one of these fields will be set:
|
|
558
|
-
#
|
|
559
|
-
#
|
|
560
|
-
# one or more (repeated)
|
|
558
|
+
# `error`,
|
|
559
|
+
# `speech_event_type`, or
|
|
560
|
+
# one or more (repeated) `results`.
|
|
561
561
|
# @!attribute [rw] error
|
|
562
562
|
# @return [Google::Rpc::Status]
|
|
563
563
|
# Output only. If set, returns a {Google::Rpc::Status} message that
|
|
@@ -566,8 +566,8 @@ module Google
|
|
|
566
566
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::StreamingRecognitionResult>]
|
|
567
567
|
# Output only. This repeated list contains zero or more results that
|
|
568
568
|
# correspond to consecutive portions of the audio currently being processed.
|
|
569
|
-
# It contains zero or one
|
|
570
|
-
# followed by zero or more
|
|
569
|
+
# It contains zero or one `is_final=true` result (the newly settled portion),
|
|
570
|
+
# followed by zero or more `is_final=false` results (the interim results).
|
|
571
571
|
# @!attribute [rw] speech_event_type
|
|
572
572
|
# @return [Google::Cloud::Speech::V1p1beta1::StreamingRecognizeResponse::SpeechEventType]
|
|
573
573
|
# Output only. Indicates the type of speech event.
|
|
@@ -583,7 +583,7 @@ module Google
|
|
|
583
583
|
# additional results). The client should stop sending additional audio
|
|
584
584
|
# data, half-close the gRPC connection, and wait for any additional results
|
|
585
585
|
# until the server closes the gRPC connection. This event is only sent if
|
|
586
|
-
#
|
|
586
|
+
# `single_utterance` was set to `true`, and is not used otherwise.
|
|
587
587
|
END_OF_SINGLE_UTTERANCE = 1
|
|
588
588
|
end
|
|
589
589
|
end
|
|
@@ -593,14 +593,14 @@ module Google
|
|
|
593
593
|
# @!attribute [rw] alternatives
|
|
594
594
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionAlternative>]
|
|
595
595
|
# Output only. May contain one or more recognition hypotheses (up to the
|
|
596
|
-
# maximum specified in
|
|
596
|
+
# maximum specified in `max_alternatives`).
|
|
597
597
|
# These alternatives are ordered in terms of accuracy, with the top (first)
|
|
598
598
|
# alternative being the most probable, as ranked by the recognizer.
|
|
599
599
|
# @!attribute [rw] is_final
|
|
600
600
|
# @return [true, false]
|
|
601
|
-
# Output only. If
|
|
602
|
-
# interim result that may change. If
|
|
603
|
-
# speech service will return this particular
|
|
601
|
+
# Output only. If `false`, this `StreamingRecognitionResult` represents an
|
|
602
|
+
# interim result that may change. If `true`, this is the final time the
|
|
603
|
+
# speech service will return this particular `StreamingRecognitionResult`,
|
|
604
604
|
# the recognizer will not return any further hypotheses for this portion of
|
|
605
605
|
# the transcript and corresponding audio.
|
|
606
606
|
# @!attribute [rw] stability
|
|
@@ -608,8 +608,8 @@ module Google
|
|
|
608
608
|
# Output only. An estimate of the likelihood that the recognizer will not
|
|
609
609
|
# change its guess about this interim result. Values range from 0.0
|
|
610
610
|
# (completely unstable) to 1.0 (completely stable).
|
|
611
|
-
# This field is only provided for interim results (
|
|
612
|
-
# The default of 0.0 is a sentinel value indicating
|
|
611
|
+
# This field is only provided for interim results (`is_final=false`).
|
|
612
|
+
# The default of 0.0 is a sentinel value indicating `stability` was not set.
|
|
613
613
|
# @!attribute [rw] channel_tag
|
|
614
614
|
# @return [Integer]
|
|
615
615
|
# For multi-channel audio, this is the channel number corresponding to the
|
|
@@ -627,7 +627,7 @@ module Google
|
|
|
627
627
|
# @!attribute [rw] alternatives
|
|
628
628
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionAlternative>]
|
|
629
629
|
# Output only. May contain one or more recognition hypotheses (up to the
|
|
630
|
-
# maximum specified in
|
|
630
|
+
# maximum specified in `max_alternatives`).
|
|
631
631
|
# These alternatives are ordered in terms of accuracy, with the top (first)
|
|
632
632
|
# alternative being the most probable, as ranked by the recognizer.
|
|
633
633
|
# @!attribute [rw] channel_tag
|
|
@@ -652,10 +652,10 @@ module Google
|
|
|
652
652
|
# Output only. The confidence estimate between 0.0 and 1.0. A higher number
|
|
653
653
|
# indicates an estimated greater likelihood that the recognized words are
|
|
654
654
|
# correct. This field is set only for the top alternative of a non-streaming
|
|
655
|
-
# result or, of a streaming result where
|
|
655
|
+
# result or, of a streaming result where `is_final=true`.
|
|
656
656
|
# This field is not guaranteed to be accurate and users should not rely on it
|
|
657
657
|
# to be always provided.
|
|
658
|
-
# The default of 0.0 is a sentinel value indicating
|
|
658
|
+
# The default of 0.0 is a sentinel value indicating `confidence` was not set.
|
|
659
659
|
# @!attribute [rw] words
|
|
660
660
|
# @return [Array<Google::Cloud::Speech::V1p1beta1::WordInfo>]
|
|
661
661
|
# Output only. A list of word-specific information for each recognized word.
|
|
@@ -668,7 +668,7 @@ module Google
|
|
|
668
668
|
# @return [Google::Protobuf::Duration]
|
|
669
669
|
# Output only. Time offset relative to the beginning of the audio,
|
|
670
670
|
# and corresponding to the start of the spoken word.
|
|
671
|
-
# This field is only set if
|
|
671
|
+
# This field is only set if `enable_word_time_offsets=true` and only
|
|
672
672
|
# in the top hypothesis.
|
|
673
673
|
# This is an experimental feature and the accuracy of the time offset can
|
|
674
674
|
# vary.
|
|
@@ -676,7 +676,7 @@ module Google
|
|
|
676
676
|
# @return [Google::Protobuf::Duration]
|
|
677
677
|
# Output only. Time offset relative to the beginning of the audio,
|
|
678
678
|
# and corresponding to the end of the spoken word.
|
|
679
|
-
# This field is only set if
|
|
679
|
+
# This field is only set if `enable_word_time_offsets=true` and only
|
|
680
680
|
# in the top hypothesis.
|
|
681
681
|
# This is an experimental feature and the accuracy of the time offset can
|
|
682
682
|
# vary.
|
|
@@ -688,10 +688,10 @@ module Google
|
|
|
688
688
|
# Output only. The confidence estimate between 0.0 and 1.0. A higher number
|
|
689
689
|
# indicates an estimated greater likelihood that the recognized words are
|
|
690
690
|
# correct. This field is set only for the top alternative of a non-streaming
|
|
691
|
-
# result or, of a streaming result where
|
|
691
|
+
# result or, of a streaming result where `is_final=true`.
|
|
692
692
|
# This field is not guaranteed to be accurate and users should not rely on it
|
|
693
693
|
# to be always provided.
|
|
694
|
-
# The default of 0.0 is a sentinel value indicating
|
|
694
|
+
# The default of 0.0 is a sentinel value indicating `confidence` was not set.
|
|
695
695
|
# @!attribute [rw] speaker_tag
|
|
696
696
|
# @return [Integer]
|
|
697
697
|
# Output only. A distinct integer value is assigned for every speaker within
|