google-cloud-speech 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google-cloud-speech.rb +6 -13
- data/lib/google/cloud/speech.rb +102 -24
- data/lib/google/cloud/speech/audio.rb +73 -11
- data/lib/google/cloud/speech/job.rb +10 -15
- data/lib/google/cloud/speech/project.rb +142 -40
- data/lib/google/cloud/speech/result.rb +124 -10
- data/lib/google/cloud/speech/service.rb +33 -14
- data/lib/google/cloud/speech/stream.rb +563 -0
- data/lib/google/cloud/speech/v1beta1.rb +2 -0
- data/lib/google/cloud/speech/v1beta1/speech_api.rb +1 -0
- data/lib/google/cloud/speech/version.rb +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83a0ff44f2fad9796bdd39f3493c05e858bb7165
|
4
|
+
data.tar.gz: f5f63a9a256c082864f7fcbcdfa7741e8f55b494
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a077cbf101500f553b7156a23b44e83086e6ef2c452edaffaac4cdedda39ef86712da8e58821f933f6b8076b08e2de5132696e57856a0f528d756816bddee2ee
|
7
|
+
data.tar.gz: cac8e868df8240737eaa8733190743afaf314353f6ad806f885f6d8fd55d5d4e19dda6de4df0325666a621a44216b551883224058ec7c45a2be2b0d0a36f2df6
|
data/lib/google-cloud-speech.rb
CHANGED
@@ -40,7 +40,7 @@ module Google
|
|
40
40
|
# * `https://www.googleapis.com/auth/speech`
|
41
41
|
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
42
42
|
# @param [Hash] client_config A hash of values to override the default
|
43
|
-
# behavior of the API client.
|
43
|
+
# behavior of the API client. Optional.
|
44
44
|
#
|
45
45
|
# @return [Google::Cloud::Speech::Project]
|
46
46
|
#
|
@@ -87,12 +87,12 @@ module Google
|
|
87
87
|
# * `https://www.googleapis.com/auth/speech`
|
88
88
|
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
89
89
|
# @param [Hash] client_config A hash of values to override the default
|
90
|
-
# behavior of the API client.
|
90
|
+
# behavior of the API client. Optional.
|
91
91
|
#
|
92
92
|
# @return [Google::Cloud::Speech::Project]
|
93
93
|
#
|
94
94
|
# @example
|
95
|
-
# require "google/cloud
|
95
|
+
# require "google/cloud"
|
96
96
|
#
|
97
97
|
# speech = Google::Cloud.speech
|
98
98
|
#
|
@@ -102,16 +102,9 @@ module Google
|
|
102
102
|
def self.speech project = nil, keyfile = nil, scope: nil, timeout: nil,
|
103
103
|
client_config: nil
|
104
104
|
require "google/cloud/speech"
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
else
|
109
|
-
credentials = Google::Cloud::Speech::Credentials.new(
|
110
|
-
keyfile, scope: scope)
|
111
|
-
end
|
112
|
-
Google::Cloud::Speech::Project.new(
|
113
|
-
Google::Cloud::Speech::Service.new(
|
114
|
-
project, credentials, timeout: timeout, client_config: client_config))
|
105
|
+
Google::Cloud::Speech.new project: project, keyfile: keyfile,
|
106
|
+
scope: scope, timeout: timeout,
|
107
|
+
client_config: client_config
|
115
108
|
end
|
116
109
|
end
|
117
110
|
end
|
data/lib/google/cloud/speech.rb
CHANGED
@@ -51,10 +51,9 @@ module Google
|
|
51
51
|
# API. You can provide a file path:
|
52
52
|
#
|
53
53
|
# ```ruby
|
54
|
-
# require "google/cloud"
|
54
|
+
# require "google/cloud/speech"
|
55
55
|
#
|
56
|
-
#
|
57
|
-
# speech = gcloud.speech
|
56
|
+
# speech = Google::Cloud::Speech.new
|
58
57
|
#
|
59
58
|
# audio = speech.audio "path/to/audio.raw",
|
60
59
|
# encoding: :raw, sample_rate: 16000
|
@@ -63,10 +62,9 @@ module Google
|
|
63
62
|
# Or, you can initialize the audio instance with a Google Cloud Storage URI:
|
64
63
|
#
|
65
64
|
# ```ruby
|
66
|
-
# require "google/cloud"
|
65
|
+
# require "google/cloud/speech"
|
67
66
|
#
|
68
|
-
#
|
69
|
-
# speech = gcloud.speech
|
67
|
+
# speech = Google::Cloud::Speech.new
|
70
68
|
#
|
71
69
|
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
72
70
|
# encoding: :raw, sample_rate: 16000
|
@@ -75,15 +73,16 @@ module Google
|
|
75
73
|
# Or, with a Google Cloud Storage File object:
|
76
74
|
#
|
77
75
|
# ```ruby
|
78
|
-
# require "google/cloud"
|
76
|
+
# require "google/cloud/storage"
|
79
77
|
#
|
80
|
-
#
|
81
|
-
# storage = gcloud.storage
|
78
|
+
# storage = Google::Cloud::Storage.new
|
82
79
|
#
|
83
80
|
# bucket = storage.bucket "bucket-name"
|
84
81
|
# file = bucket.file "path/to/audio.raw"
|
85
82
|
#
|
86
|
-
#
|
83
|
+
# require "google/cloud/speech"
|
84
|
+
#
|
85
|
+
# speech = Google::Cloud::Speech.new
|
87
86
|
#
|
88
87
|
# audio = speech.audio file, encoding: :raw, sample_rate: 16000
|
89
88
|
# ```
|
@@ -95,16 +94,15 @@ module Google
|
|
95
94
|
# recognition operation.
|
96
95
|
#
|
97
96
|
# Use {Speech::Audio#recognize} for synchronous speech recognition that
|
98
|
-
# returns {Result} objects only after all audio has been processed.
|
99
|
-
# method is limited to audio data of 1 minute or less in duration, and
|
100
|
-
# take roughly the same amount of time to process as the duration of
|
101
|
-
# supplied audio data.
|
97
|
+
# returns {Speech::Result} objects only after all audio has been processed.
|
98
|
+
# This method is limited to audio data of 1 minute or less in duration, and
|
99
|
+
# will take roughly the same amount of time to process as the duration of
|
100
|
+
# the supplied audio data.
|
102
101
|
#
|
103
102
|
# ```ruby
|
104
|
-
# require "google/cloud"
|
103
|
+
# require "google/cloud/speech"
|
105
104
|
#
|
106
|
-
#
|
107
|
-
# speech = gcloud.speech
|
105
|
+
# speech = Google::Cloud::Speech.new
|
108
106
|
#
|
109
107
|
# audio = speech.audio "path/to/audio.raw",
|
110
108
|
# encoding: :raw, sample_rate: 16000
|
@@ -112,19 +110,18 @@ module Google
|
|
112
110
|
#
|
113
111
|
# result = results.first
|
114
112
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
115
|
-
# result.confidence #=>
|
113
|
+
# result.confidence #=> 0.9826789498329163
|
116
114
|
# ```
|
117
115
|
#
|
118
116
|
# Use {Speech::Audio#recognize_job} for asynchronous speech recognition,
|
119
|
-
# in which a {Job} is returned immediately after the audio data has
|
120
|
-
# been sent. The job can be refreshed to retrieve {Result} objects
|
117
|
+
# in which a {Speech::Job} is returned immediately after the audio data has
|
118
|
+
# been sent. The job can be refreshed to retrieve {Speech::Result} objects
|
121
119
|
# once the audio data has been processed.
|
122
120
|
#
|
123
121
|
# ```ruby
|
124
|
-
# require "google/cloud"
|
122
|
+
# require "google/cloud/speech"
|
125
123
|
#
|
126
|
-
#
|
127
|
-
# speech = gcloud.speech
|
124
|
+
# speech = Google::Cloud::Speech.new
|
128
125
|
#
|
129
126
|
# audio = speech.audio "path/to/audio.raw",
|
130
127
|
# encoding: :raw, sample_rate: 16000
|
@@ -137,10 +134,91 @@ module Google
|
|
137
134
|
#
|
138
135
|
# result = results.first
|
139
136
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
140
|
-
# result.confidence #=>
|
137
|
+
# result.confidence #=> 0.9826789498329163
|
138
|
+
# ```
|
139
|
+
#
|
140
|
+
# Use {Speech::Project#stream} for streaming audio data for speech
|
141
|
+
# recognition, in which a {Speech::Stream} is returned. The stream object
|
142
|
+
# can receive results while sending audio by performing bidirectional
|
143
|
+
# streaming speech-recognition.
|
144
|
+
#
|
145
|
+
# ```ruby
|
146
|
+
# require "google/cloud/speech"
|
147
|
+
#
|
148
|
+
# speech = Google::Cloud::Speech.new
|
149
|
+
#
|
150
|
+
# audio = speech.audio "path/to/audio.raw"
|
151
|
+
#
|
152
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
153
|
+
#
|
154
|
+
# # register callback for when a result is returned
|
155
|
+
# stream.on_result do |results|
|
156
|
+
# result = results.first
|
157
|
+
# result.transcript #=> "how old is the Brooklyn Bridge"
|
158
|
+
# result.confidence #=> 0.9826789498329163
|
159
|
+
# end
|
160
|
+
#
|
161
|
+
# # Stream 5 seconds of audio from the microhone
|
162
|
+
# # Actual implementation of microphone input varies by platform
|
163
|
+
# 5.times.do
|
164
|
+
# stream.send MicrophoneInput.read(32000)
|
165
|
+
# end
|
166
|
+
#
|
167
|
+
# stream.stop
|
141
168
|
# ```
|
142
169
|
#
|
170
|
+
# Obtaining audio data from input sources such as a Microphone is outside
|
171
|
+
# the scope of this document.
|
172
|
+
#
|
143
173
|
module Speech
|
174
|
+
##
|
175
|
+
# Creates a new object for connecting to the Speech service.
|
176
|
+
# Each call creates a new connection.
|
177
|
+
#
|
178
|
+
# For more information on connecting to Google Cloud see the
|
179
|
+
# [Authentication
|
180
|
+
# Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
|
181
|
+
#
|
182
|
+
# @param [String] project Project identifier for the Speech service you
|
183
|
+
# are connecting to.
|
184
|
+
# @param [String, Hash] keyfile Keyfile downloaded from Google Cloud. If
|
185
|
+
# file path the file must be readable.
|
186
|
+
# @param [String, Array<String>] scope The OAuth 2.0 scopes controlling
|
187
|
+
# the set of resources and operations that the connection can access.
|
188
|
+
# See [Using OAuth 2.0 to Access Google
|
189
|
+
# APIs](https://developers.google.com/identity/protocols/OAuth2).
|
190
|
+
#
|
191
|
+
# The default scope is:
|
192
|
+
#
|
193
|
+
# * `https://www.googleapis.com/auth/speech`
|
194
|
+
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
195
|
+
# @param [Hash] client_config A hash of values to override the default
|
196
|
+
# behavior of the API client. Optional.
|
197
|
+
#
|
198
|
+
# @return [Google::Cloud::Speech::Project]
|
199
|
+
#
|
200
|
+
# @example
|
201
|
+
# require "google/cloud/speech"
|
202
|
+
#
|
203
|
+
# speech = Google::Cloud::Speech.new
|
204
|
+
#
|
205
|
+
# audio = speech.audio "path/to/audio.raw",
|
206
|
+
# encoding: :raw, sample_rate: 16000
|
207
|
+
#
|
208
|
+
def self.new project: nil, keyfile: nil, scope: nil, timeout: nil,
|
209
|
+
client_config: nil
|
210
|
+
project ||= Google::Cloud::Speech::Project.default_project
|
211
|
+
if keyfile.nil?
|
212
|
+
credentials = Google::Cloud::Speech::Credentials.default scope: scope
|
213
|
+
else
|
214
|
+
credentials = Google::Cloud::Speech::Credentials.new(
|
215
|
+
keyfile, scope: scope)
|
216
|
+
end
|
217
|
+
Google::Cloud::Speech::Project.new(
|
218
|
+
Google::Cloud::Speech::Service.new(
|
219
|
+
project, credentials, timeout: timeout,
|
220
|
+
client_config: client_config))
|
221
|
+
end
|
144
222
|
end
|
145
223
|
end
|
146
224
|
end
|
@@ -34,10 +34,9 @@ module Google
|
|
34
34
|
# Languages
|
35
35
|
#
|
36
36
|
# @example
|
37
|
-
# require "google/cloud"
|
37
|
+
# require "google/cloud/speech"
|
38
38
|
#
|
39
|
-
#
|
40
|
-
# speech = gcloud.speech
|
39
|
+
# speech = Google::Cloud::Speech.new
|
41
40
|
#
|
42
41
|
# audio = speech.audio "path/to/audio.raw",
|
43
42
|
# encoding: :raw, sample_rate: 16000
|
@@ -45,15 +44,80 @@ module Google
|
|
45
44
|
#
|
46
45
|
# result = results.first
|
47
46
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
48
|
-
# result.confidence #=>
|
47
|
+
# result.confidence #=> 0.9826789498329163
|
49
48
|
#
|
50
49
|
class Audio
|
51
50
|
# @private The V1beta1::RecognitionAudio object.
|
52
51
|
attr_reader :grpc
|
53
52
|
# @private The Project object.
|
54
53
|
attr_reader :speech
|
54
|
+
|
55
|
+
##
|
56
|
+
# Encoding of audio data to be recognized.
|
57
|
+
#
|
58
|
+
# Acceptable values are:
|
59
|
+
#
|
60
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
61
|
+
# (LINEAR16)
|
62
|
+
# * `flac` - The [Free Lossless Audio
|
63
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
64
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
65
|
+
# are supported. (FLAC)
|
66
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
67
|
+
# G.711 PCMU/mu-law. (MULAW)
|
68
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
69
|
+
# be 8000 Hz.) (AMR)
|
70
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
71
|
+
# be 16000 Hz.) (AMR_WB)
|
72
|
+
#
|
73
|
+
# @return [String,Symbol]
|
74
|
+
#
|
75
|
+
# @example
|
76
|
+
# require "google/cloud/speech"
|
77
|
+
#
|
78
|
+
# speech = Google::Cloud::Speech.new
|
79
|
+
#
|
80
|
+
# audio = speech.audio "path/to/audio.raw",
|
81
|
+
# encoding: :raw, sample_rate: 16000
|
82
|
+
#
|
55
83
|
attr_accessor :encoding
|
84
|
+
|
85
|
+
##
|
86
|
+
# Sample rate in Hertz of the audio data to be recognized. Valid values
|
87
|
+
# are: 8000-48000. 16000 is optimal. For best results, set the sampling
|
88
|
+
# rate of the audio source to 16000 Hz. If that's not possible, use the
|
89
|
+
# native sample rate of the audio source (instead of re-sampling).
|
90
|
+
#
|
91
|
+
# @return [Integer]
|
92
|
+
#
|
93
|
+
# @example
|
94
|
+
# require "google/cloud/speech"
|
95
|
+
#
|
96
|
+
# speech = Google::Cloud::Speech.new
|
97
|
+
#
|
98
|
+
# audio = speech.audio "path/to/audio.raw",
|
99
|
+
# encoding: :raw, sample_rate: 16000
|
100
|
+
#
|
56
101
|
attr_accessor :sample_rate
|
102
|
+
|
103
|
+
##
|
104
|
+
# The language of the supplied audio as a
|
105
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language code.
|
106
|
+
# If not specified, the language defaults to "en-US". See [Language
|
107
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
108
|
+
# for a list of the currently supported language codes.
|
109
|
+
#
|
110
|
+
# @return [String,Symbol]
|
111
|
+
#
|
112
|
+
# @example
|
113
|
+
# require "google/cloud/speech"
|
114
|
+
#
|
115
|
+
# speech = Google::Cloud::Speech.new
|
116
|
+
#
|
117
|
+
# audio = speech.audio "path/to/audio.raw",
|
118
|
+
# encoding: :raw, sample_rate: 16000,
|
119
|
+
# language: :en
|
120
|
+
#
|
57
121
|
attr_accessor :language
|
58
122
|
|
59
123
|
##
|
@@ -108,10 +172,9 @@ module Google
|
|
108
172
|
# @return [Array<Result>] The transcribed text of audio recognized.
|
109
173
|
#
|
110
174
|
# @example
|
111
|
-
# require "google/cloud"
|
175
|
+
# require "google/cloud/speech"
|
112
176
|
#
|
113
|
-
#
|
114
|
-
# speech = gcloud.speech
|
177
|
+
# speech = Google::Cloud::Speech.new
|
115
178
|
#
|
116
179
|
# audio = speech.audio "path/to/audio.raw",
|
117
180
|
# encoding: :raw, sample_rate: 16000
|
@@ -119,7 +182,7 @@ module Google
|
|
119
182
|
#
|
120
183
|
# result = results.first
|
121
184
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
122
|
-
# result.confidence #=>
|
185
|
+
# result.confidence #=> 0.9826789498329163
|
123
186
|
#
|
124
187
|
def recognize max_alternatives: nil, profanity_filter: nil, phrases: nil
|
125
188
|
ensure_speech!
|
@@ -156,10 +219,9 @@ module Google
|
|
156
219
|
# processing of a speech-recognition operation.
|
157
220
|
#
|
158
221
|
# @example
|
159
|
-
# require "google/cloud"
|
222
|
+
# require "google/cloud/speech"
|
160
223
|
#
|
161
|
-
#
|
162
|
-
# speech = gcloud.speech
|
224
|
+
# speech = Google::Cloud::Speech.new
|
163
225
|
#
|
164
226
|
# audio = speech.audio "path/to/audio.raw",
|
165
227
|
# encoding: :raw, sample_rate: 16000
|
@@ -33,10 +33,9 @@ module Google
|
|
33
33
|
# Long-running Operation
|
34
34
|
#
|
35
35
|
# @example
|
36
|
-
# require "google/cloud"
|
36
|
+
# require "google/cloud/speech"
|
37
37
|
#
|
38
|
-
#
|
39
|
-
# speech = gcloud.speech
|
38
|
+
# speech = Google::Cloud::Speech.new
|
40
39
|
#
|
41
40
|
# job = speech.recognize_job "path/to/audio.raw",
|
42
41
|
# encoding: :raw, sample_rate: 16000
|
@@ -69,10 +68,9 @@ module Google
|
|
69
68
|
# the job is not done this will return `nil`.
|
70
69
|
#
|
71
70
|
# @example
|
72
|
-
# require "google/cloud"
|
71
|
+
# require "google/cloud/speech"
|
73
72
|
#
|
74
|
-
#
|
75
|
-
# speech = gcloud.speech
|
73
|
+
# speech = Google::Cloud::Speech.new
|
76
74
|
#
|
77
75
|
# job = speech.recognize_job "path/to/audio.raw",
|
78
76
|
# encoding: :raw, sample_rate: 16000
|
@@ -99,10 +97,9 @@ module Google
|
|
99
97
|
# @return [boolean] `true` when complete, `false` otherwise.
|
100
98
|
#
|
101
99
|
# @example
|
102
|
-
# require "google/cloud"
|
100
|
+
# require "google/cloud/speech"
|
103
101
|
#
|
104
|
-
#
|
105
|
-
# speech = gcloud.speech
|
102
|
+
# speech = Google::Cloud::Speech.new
|
106
103
|
#
|
107
104
|
# job = speech.recognize_job "path/to/audio.raw",
|
108
105
|
# encoding: :raw, sample_rate: 16000
|
@@ -118,10 +115,9 @@ module Google
|
|
118
115
|
# processing of a speech-recognition operation.
|
119
116
|
#
|
120
117
|
# @example
|
121
|
-
# require "google/cloud"
|
118
|
+
# require "google/cloud/speech"
|
122
119
|
#
|
123
|
-
#
|
124
|
-
# speech = gcloud.speech
|
120
|
+
# speech = Google::Cloud::Speech.new
|
125
121
|
#
|
126
122
|
# job = speech.recognize_job "path/to/audio.raw",
|
127
123
|
# encoding: :raw, sample_rate: 16000
|
@@ -141,10 +137,9 @@ module Google
|
|
141
137
|
# reloads will incrementally increase.
|
142
138
|
#
|
143
139
|
# @example
|
144
|
-
# require "google/cloud"
|
140
|
+
# require "google/cloud/speech"
|
145
141
|
#
|
146
|
-
#
|
147
|
-
# speech = gcloud.speech
|
142
|
+
# speech = Google::Cloud::Speech.new
|
148
143
|
#
|
149
144
|
# job = speech.recognize_job "path/to/audio.raw",
|
150
145
|
# encoding: :raw, sample_rate: 16000
|
@@ -14,11 +14,12 @@
|
|
14
14
|
|
15
15
|
|
16
16
|
require "google/cloud/errors"
|
17
|
-
require "google/cloud/core/
|
17
|
+
require "google/cloud/core/environment"
|
18
18
|
require "google/cloud/speech/service"
|
19
19
|
require "google/cloud/speech/audio"
|
20
20
|
require "google/cloud/speech/result"
|
21
21
|
require "google/cloud/speech/job"
|
22
|
+
require "google/cloud/speech/stream"
|
22
23
|
|
23
24
|
module Google
|
24
25
|
module Cloud
|
@@ -38,10 +39,9 @@ module Google
|
|
38
39
|
# See {Google::Cloud#speech}
|
39
40
|
#
|
40
41
|
# @example
|
41
|
-
# require "google/cloud"
|
42
|
+
# require "google/cloud/speech"
|
42
43
|
#
|
43
|
-
#
|
44
|
-
# speech = gcloud.speech
|
44
|
+
# speech = Google::Cloud::Speech.new
|
45
45
|
#
|
46
46
|
# audio = speech.audio "path/to/audio.raw",
|
47
47
|
# encoding: :raw, sample_rate: 16000
|
@@ -49,7 +49,7 @@ module Google
|
|
49
49
|
#
|
50
50
|
# result = results.first
|
51
51
|
# result.transcript #=> "how old is the Brooklyn Bridge"
|
52
|
-
# result.confidence #=>
|
52
|
+
# result.confidence #=> 0.9826789498329163
|
53
53
|
#
|
54
54
|
class Project
|
55
55
|
##
|
@@ -65,11 +65,12 @@ module Google
|
|
65
65
|
# The Speech project connected to.
|
66
66
|
#
|
67
67
|
# @example
|
68
|
-
# require "google/cloud"
|
68
|
+
# require "google/cloud/speech"
|
69
69
|
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
70
|
+
# speech = Google::Cloud::Speech.new(
|
71
|
+
# project: "my-project-id",
|
72
|
+
# keyfile: "/path/to/keyfile.json"
|
73
|
+
# )
|
73
74
|
#
|
74
75
|
# speech.project #=> "my-project-id"
|
75
76
|
#
|
@@ -83,7 +84,7 @@ module Google
|
|
83
84
|
ENV["SPEECH_PROJECT"] ||
|
84
85
|
ENV["GOOGLE_CLOUD_PROJECT"] ||
|
85
86
|
ENV["GCLOUD_PROJECT"] ||
|
86
|
-
Google::Cloud::Core::
|
87
|
+
Google::Cloud::Core::Environment.project_id
|
87
88
|
end
|
88
89
|
|
89
90
|
##
|
@@ -135,33 +136,32 @@ module Google
|
|
135
136
|
# @return [Audio] The audio file to be recognized.
|
136
137
|
#
|
137
138
|
# @example
|
138
|
-
# require "google/cloud"
|
139
|
+
# require "google/cloud/speech"
|
139
140
|
#
|
140
|
-
#
|
141
|
-
# speech = gcloud.speech
|
141
|
+
# speech = Google::Cloud::Speech.new
|
142
142
|
#
|
143
143
|
# audio = speech.audio "path/to/audio.raw",
|
144
144
|
# encoding: :raw, sample_rate: 16000
|
145
145
|
#
|
146
146
|
# @example With a Google Cloud Storage URI:
|
147
|
-
# require "google/cloud"
|
147
|
+
# require "google/cloud/speech"
|
148
148
|
#
|
149
|
-
#
|
150
|
-
# speech = gcloud.speech
|
149
|
+
# speech = Google::Cloud::Speech.new
|
151
150
|
#
|
152
151
|
# audio = speech.audio "gs://bucket-name/path/to/audio.raw",
|
153
152
|
# encoding: :raw, sample_rate: 16000
|
154
153
|
#
|
155
154
|
# @example With a Google Cloud Storage File object:
|
156
|
-
# require "google/cloud"
|
155
|
+
# require "google/cloud/storage"
|
157
156
|
#
|
158
|
-
#
|
159
|
-
# storage = gcloud.storage
|
157
|
+
# storage = Google::Cloud::Storage.new
|
160
158
|
#
|
161
159
|
# bucket = storage.bucket "bucket-name"
|
162
160
|
# file = bucket.file "path/to/audio.raw"
|
163
161
|
#
|
164
|
-
#
|
162
|
+
# require "google/cloud/speech"
|
163
|
+
#
|
164
|
+
# speech = Google::Cloud::Speech.new
|
165
165
|
#
|
166
166
|
# audio = speech.audio file, encoding: :raw, sample_rate: 16000
|
167
167
|
#
|
@@ -243,33 +243,32 @@ module Google
|
|
243
243
|
# @return [Array<Result>] The transcribed text of audio recognized.
|
244
244
|
#
|
245
245
|
# @example
|
246
|
-
# require "google/cloud"
|
246
|
+
# require "google/cloud/speech"
|
247
247
|
#
|
248
|
-
#
|
249
|
-
# speech = gcloud.speech
|
248
|
+
# speech = Google::Cloud::Speech.new
|
250
249
|
#
|
251
250
|
# results = speech.recognize "path/to/audio.raw",
|
252
251
|
# encoding: :raw, sample_rate: 16000
|
253
252
|
#
|
254
253
|
# @example With a Google Cloud Storage URI:
|
255
|
-
# require "google/cloud"
|
254
|
+
# require "google/cloud/speech"
|
256
255
|
#
|
257
|
-
#
|
258
|
-
# speech = gcloud.speech
|
256
|
+
# speech = Google::Cloud::Speech.new
|
259
257
|
#
|
260
258
|
# results = speech.recognize "gs://bucket-name/path/to/audio.raw",
|
261
259
|
# encoding: :raw, sample_rate: 16000
|
262
260
|
#
|
263
261
|
# @example With a Google Cloud Storage File object:
|
264
|
-
# require "google/cloud"
|
262
|
+
# require "google/cloud/storage"
|
265
263
|
#
|
266
|
-
#
|
267
|
-
# storage = gcloud.storage
|
264
|
+
# storage = Google::Cloud::Storage.new
|
268
265
|
#
|
269
266
|
# bucket = storage.bucket "bucket-name"
|
270
267
|
# file = bucket.file "path/to/audio.raw"
|
271
268
|
#
|
272
|
-
#
|
269
|
+
# require "google/cloud/speech"
|
270
|
+
#
|
271
|
+
# speech = Google::Cloud::Speech.new
|
273
272
|
#
|
274
273
|
# results = speech.recognize file, encoding: :raw,
|
275
274
|
# sample_rate: 16000,
|
@@ -342,10 +341,9 @@ module Google
|
|
342
341
|
# processing of a speech-recognition operation.
|
343
342
|
#
|
344
343
|
# @example
|
345
|
-
# require "google/cloud"
|
344
|
+
# require "google/cloud/speech"
|
346
345
|
#
|
347
|
-
#
|
348
|
-
# speech = gcloud.speech
|
346
|
+
# speech = Google::Cloud::Speech.new
|
349
347
|
#
|
350
348
|
# job = speech.recognize_job "path/to/audio.raw",
|
351
349
|
# encoding: :raw, sample_rate: 16000
|
@@ -354,10 +352,9 @@ module Google
|
|
354
352
|
# job.reload!
|
355
353
|
#
|
356
354
|
# @example With a Google Cloud Storage URI:
|
357
|
-
# require "google/cloud"
|
355
|
+
# require "google/cloud/speech"
|
358
356
|
#
|
359
|
-
#
|
360
|
-
# speech = gcloud.speech
|
357
|
+
# speech = Google::Cloud::Speech.new
|
361
358
|
#
|
362
359
|
# job = speech.recognize_job "gs://bucket-name/path/to/audio.raw",
|
363
360
|
# encoding: :raw, sample_rate: 16000
|
@@ -366,15 +363,16 @@ module Google
|
|
366
363
|
# job.reload!
|
367
364
|
#
|
368
365
|
# @example With a Google Cloud Storage File object:
|
369
|
-
# require "google/cloud"
|
366
|
+
# require "google/cloud/storage"
|
370
367
|
#
|
371
|
-
#
|
372
|
-
# storage = gcloud.storage
|
368
|
+
# storage = Google::Cloud::Storage.new
|
373
369
|
#
|
374
370
|
# bucket = storage.bucket "bucket-name"
|
375
371
|
# file = bucket.file "path/to/audio.raw"
|
376
372
|
#
|
377
|
-
#
|
373
|
+
# require "google/cloud/speech"
|
374
|
+
#
|
375
|
+
# speech = Google::Cloud::Speech.new
|
378
376
|
#
|
379
377
|
# job = speech.recognize_job file, encoding: :raw,
|
380
378
|
# sample_rate: 16000,
|
@@ -400,6 +398,110 @@ module Google
|
|
400
398
|
Job.from_grpc grpc, service
|
401
399
|
end
|
402
400
|
|
401
|
+
##
|
402
|
+
# Creates a Stream object to perform bidirectional streaming
|
403
|
+
# speech-recognition: receive results while sending audio.
|
404
|
+
#
|
405
|
+
# @see https://cloud.google.com/speech/docs/basics#streaming-recognition
|
406
|
+
# Streaming Speech API Recognition Requests
|
407
|
+
#
|
408
|
+
# @param [String, Symbol] encoding Encoding of audio data to be
|
409
|
+
# recognized. Optional.
|
410
|
+
#
|
411
|
+
# Acceptable values are:
|
412
|
+
#
|
413
|
+
# * `raw` - Uncompressed 16-bit signed little-endian samples.
|
414
|
+
# (LINEAR16)
|
415
|
+
# * `flac` - The [Free Lossless Audio
|
416
|
+
# Codec](http://flac.sourceforge.net/documentation.html) encoding.
|
417
|
+
# Only 16-bit samples are supported. Not all fields in STREAMINFO
|
418
|
+
# are supported. (FLAC)
|
419
|
+
# * `mulaw` - 8-bit samples that compand 14-bit audio samples using
|
420
|
+
# G.711 PCMU/mu-law. (MULAW)
|
421
|
+
# * `amr` - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
|
422
|
+
# be 8000 Hz.) (AMR)
|
423
|
+
# * `amr_wb` - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
|
424
|
+
# be 16000 Hz.) (AMR_WB)
|
425
|
+
#
|
426
|
+
# @param [Integer] sample_rate Sample rate in Hertz of the audio data
|
427
|
+
# to be recognized. Valid values are: 8000-48000. 16000 is optimal.
|
428
|
+
# For best results, set the sampling rate of the audio source to 16000
|
429
|
+
# Hz. If that's not possible, use the native sample rate of the audio
|
430
|
+
# source (instead of re-sampling). Optional.
|
431
|
+
# @param [String] language The language of the supplied audio as a
|
432
|
+
# [https://www.rfc-editor.org/rfc/bcp/bcp47.txt](BCP-47) language
|
433
|
+
# code. If not specified, the language defaults to "en-US". See
|
434
|
+
# [Language
|
435
|
+
# Support](https://cloud.google.com/speech/docs/best-practices#language_support)
|
436
|
+
# for a list of the currently supported language codes. Optional.
|
437
|
+
# @param [String] max_alternatives The Maximum number of recognition
|
438
|
+
# hypotheses to be returned. Default is 1. The service may return
|
439
|
+
# fewer. Valid values are 0-30. Defaults to 1. Optional.
|
440
|
+
# @param [Boolean] profanity_filter When `true`, the service will
|
441
|
+
# attempt to filter out profanities, replacing all but the initial
|
442
|
+
# character in each filtered word with asterisks, e.g. "f***". Default
|
443
|
+
# is `false`.
|
444
|
+
# @param [Array<String>] phrases A list of strings containing words and
|
445
|
+
# phrases "hints" so that the speech recognition is more likely to
|
446
|
+
# recognize them. See [usage
|
447
|
+
# limits](https://cloud.google.com/speech/limits#content). Optional.
|
448
|
+
# @param [Boolean] utterance When `true`, the service will perform
|
449
|
+
# continuous recognition (continuing to process audio even if the user
|
450
|
+
# pauses speaking) until the client closes the output stream (gRPC
|
451
|
+
# API) or when the maximum time limit has been reached. Default is
|
452
|
+
# `false`.
|
453
|
+
# @param [Boolean] interim When `true`, interim results (tentative
|
454
|
+
# hypotheses) may be returned as they become available. Default is
|
455
|
+
# `false`.
|
456
|
+
#
|
457
|
+
# @return [Stream] A resource that represents the streaming requests and
|
458
|
+
# responses.
|
459
|
+
#
|
460
|
+
# @example
|
461
|
+
# require "google/cloud/speech"
|
462
|
+
#
|
463
|
+
# speech = Google::Cloud::Speech.new
|
464
|
+
#
|
465
|
+
# stream = audio.stream encoding: :raw, sample_rate: 16000
|
466
|
+
#
|
467
|
+
# # register callback for when a result is returned
|
468
|
+
# stream.on_result do |results|
|
469
|
+
# result = results.first
|
470
|
+
# puts result.transcript # "how old is the Brooklyn Bridge"
|
471
|
+
# puts result.confidence # 0.9826789498329163
|
472
|
+
# end
|
473
|
+
#
|
474
|
+
# # Stream 5 seconds of audio from the microhone
|
475
|
+
# # Actual implementation of microphone input varies by platform
|
476
|
+
# 5.times.do
|
477
|
+
# stream.send MicrophoneInput.read(32000)
|
478
|
+
# end
|
479
|
+
#
|
480
|
+
# stream.stop
|
481
|
+
#
|
482
|
+
def stream encoding: nil, sample_rate: nil, language: nil,
|
483
|
+
max_alternatives: nil, profanity_filter: nil, phrases: nil,
|
484
|
+
utterance: nil, interim: nil
|
485
|
+
ensure_service!
|
486
|
+
|
487
|
+
grpc_req = V1beta1::StreamingRecognizeRequest.new(
|
488
|
+
streaming_config: V1beta1::StreamingRecognitionConfig.new(
|
489
|
+
{
|
490
|
+
config: audio_config(encoding: convert_encoding(encoding),
|
491
|
+
sample_rate: sample_rate,
|
492
|
+
language: language,
|
493
|
+
max_alternatives: max_alternatives,
|
494
|
+
profanity_filter: profanity_filter,
|
495
|
+
phrases: phrases),
|
496
|
+
single_utterance: utterance,
|
497
|
+
interim_results: interim
|
498
|
+
}.delete_if { |_, v| v.nil? }
|
499
|
+
)
|
500
|
+
)
|
501
|
+
|
502
|
+
Stream.new service, grpc_req
|
503
|
+
end
|
504
|
+
|
403
505
|
protected
|
404
506
|
|
405
507
|
def audio_config encoding: nil, sample_rate: nil, language: nil,
|