google-cloud-speech 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 83cf6bc0f6b7cda41a256f9e35d8d7b5d0cf1ecf
4
+ data.tar.gz: f5a81bafbd2102e775d329edb496457dd6ae81d8
5
+ SHA512:
6
+ metadata.gz: fed9b104e7bbd9d1e01fb34095f2f6e495c2940727c474bf2d724a1e0341f7e179f2b8be81916f8f7449322eda03b7ff86cbdce3ca1f73b17ee3d9877699d0cf
7
+ data.tar.gz: 63b312ffd750b1ae643bb7246832455f85d7c37b21058a179644c6eefa694f38800076bd2b83d946dbd541bbc94b9f82153d034e7bc96e07b26326a37036cc8a
@@ -0,0 +1,117 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ##
16
+ # This file is here to be autorequired by bundler, so that the .speech and
17
+ # #speech methods can be available, but the library and all dependencies won't
18
+ # be loaded until required and used.
19
+
20
+
21
+ gem "google-cloud-core"
22
+ require "google/cloud"
23
+
24
+ module Google
25
+ module Cloud
26
+ ##
27
+ # Creates a new object for connecting to the Speech service.
28
+ # Each call creates a new connection.
29
+ #
30
+ # For more information on connecting to Google Cloud see the [Authentication
31
+ # Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
32
+ #
33
+ # @param [String, Array<String>] scope The OAuth 2.0 scopes controlling the
34
+ # set of resources and operations that the connection can access. See
35
+ # [Using OAuth 2.0 to Access Google
36
+ # APIs](https://developers.google.com/identity/protocols/OAuth2).
37
+ #
38
+ # The default scope is:
39
+ #
40
+ # * `https://www.googleapis.com/auth/speech`
41
+ # @param [Integer] timeout Default timeout to use in requests. Optional.
42
+ # @param [Hash] client_config A hash of values to override the default
43
+ # behavior of the API client. See Google::Gax::CallSettings. Optional.
44
+ #
45
+ # @return [Google::Cloud::Speech::Project]
46
+ #
47
+ # @example
48
+ # require "google/cloud"
49
+ #
50
+ # gcloud = Google::Cloud.new
51
+ # speech = gcloud.speech
52
+ #
53
+ # audio = speech.audio "path/to/audio.raw",
54
+ # encoding: :raw, sample_rate: 16000
55
+ #
56
+ # @example The default scope can be overridden with the `scope` option:
57
+ # require "google/cloud"
58
+ #
59
+ # gcloud = Google::Cloud.new
60
+ # platform_scope = "https://www.googleapis.com/auth/cloud-platform"
61
+ # speech = gcloud.speech scope: platform_scope
62
+ #
63
+ def speech scope: nil, timeout: nil, client_config: nil
64
+ Google::Cloud.speech @project, @keyfile, scope: scope,
65
+ timeout: (timeout || @timeout),
66
+ client_config: client_config
67
+ end
68
+
69
+ ##
70
+ # Creates a new object for connecting to the Speech service.
71
+ # Each call creates a new connection.
72
+ #
73
+ # For more information on connecting to Google Cloud see the [Authentication
74
+ # Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
75
+ #
76
+ # @param [String] project Project identifier for the Speech service you are
77
+ # connecting to.
78
+ # @param [String, Hash] keyfile Keyfile downloaded from Google Cloud. If
79
+ # file path the file must be readable.
80
+ # @param [String, Array<String>] scope The OAuth 2.0 scopes controlling the
81
+ # set of resources and operations that the connection can access. See
82
+ # [Using OAuth 2.0 to Access Google
83
+ # APIs](https://developers.google.com/identity/protocols/OAuth2).
84
+ #
85
+ # The default scope is:
86
+ #
87
+ # * `https://www.googleapis.com/auth/speech`
88
+ # @param [Integer] timeout Default timeout to use in requests. Optional.
89
+ # @param [Hash] client_config A hash of values to override the default
90
+ # behavior of the API client. See Google::Gax::CallSettings. Optional.
91
+ #
92
+ # @return [Google::Cloud::Speech::Project]
93
+ #
94
+ # @example
95
+ # require "google/cloud/speech"
96
+ #
97
+ # speech = Google::Cloud.speech
98
+ #
99
+ # audio = speech.audio "path/to/audio.raw",
100
+ # encoding: :raw, sample_rate: 16000
101
+ #
102
+ def self.speech project = nil, keyfile = nil, scope: nil, timeout: nil,
103
+ client_config: nil
104
+ require "google/cloud/speech"
105
+ project ||= Google::Cloud::Speech::Project.default_project
106
+ if keyfile.nil?
107
+ credentials = Google::Cloud::Speech::Credentials.default scope: scope
108
+ else
109
+ credentials = Google::Cloud::Speech::Credentials.new(
110
+ keyfile, scope: scope)
111
+ end
112
+ Google::Cloud::Speech::Project.new(
113
+ Google::Cloud::Speech::Service.new(
114
+ project, credentials, timeout: timeout, client_config: client_config))
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,146 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google-cloud-speech"
17
+ require "google/cloud/speech/project"
18
+
19
+ module Google
20
+ module Cloud
21
+ ##
22
+ # # Google Cloud Speech
23
+ #
24
+ # Google Cloud Speech API enables developers to convert audio to text by
25
+ # applying powerful neural network models in an easy to use API. The API
26
+ # recognizes over 80 languages and variants, to support your global user
27
+ # base. You can transcribe the text of users dictating to an application's
28
+ # microphone, enable command-and-control through voice, or transcribe audio
29
+ # files, among many other use cases. Recognize audio uploaded in the
30
+ # request, and integrate with your audio storage on Google Cloud Storage, by
31
+ # using the same technology Google uses to power its own products.
32
+ #
33
+ # For more information about Google Cloud Speech API, read the [Google Cloud
34
+ # Speech API Documentation](https://cloud.google.com/speech/docs/).
35
+ #
36
+ # The goal of google-cloud is to provide an API that is comfortable to
37
+ # Rubyists. Authentication is handled by {Google::Cloud#speech}. You can
38
+ # provide the project and credential information to connect to the Cloud
39
+ # Speech service, or if you are running on Google Compute Engine this
40
+ # configuration is taken care of for you. You can read more about the
41
+ # options for connecting in the [Authentication
42
+ # Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
43
+ #
44
+ # ## Creating audio sources
45
+ #
46
+ # You can create an audio object that holds a reference to any one of
47
+ # several types of audio data source, along with metadata such as the audio
48
+ # encoding type.
49
+ #
50
+ # Use {Speech::Project#audio} to create audio sources for the Cloud Speech
51
+ # API. You can provide a file path:
52
+ #
53
+ # ```ruby
54
+ # require "google/cloud"
55
+ #
56
+ # gcloud = Google::Cloud.new
57
+ # speech = gcloud.speech
58
+ #
59
+ # audio = speech.audio "path/to/audio.raw",
60
+ # encoding: :raw, sample_rate: 16000
61
+ # ```
62
+ #
63
+ # Or, you can initialize the audio instance with a Google Cloud Storage URI:
64
+ #
65
+ # ```ruby
66
+ # require "google/cloud"
67
+ #
68
+ # gcloud = Google::Cloud.new
69
+ # speech = gcloud.speech
70
+ #
71
+ # audio = speech.audio "gs://bucket-name/path/to/audio.raw",
72
+ # encoding: :raw, sample_rate: 16000
73
+ # ```
74
+ #
75
+ # Or, with a Google Cloud Storage File object:
76
+ #
77
+ # ```ruby
78
+ # require "google/cloud"
79
+ #
80
+ # gcloud = Google::Cloud.new
81
+ # storage = gcloud.storage
82
+ #
83
+ # bucket = storage.bucket "bucket-name"
84
+ # file = bucket.file "path/to/audio.raw"
85
+ #
86
+ # speech = gcloud.speech
87
+ #
88
+ # audio = speech.audio file, encoding: :raw, sample_rate: 16000
89
+ # ```
90
+ #
91
+ # ## Recognizing speech
92
+ #
93
+ # The instance methods on {Speech::Audio} can be used to invoke both
94
+ # synchronous and asynchronous versions of the Cloud Speech API speech
95
+ # recognition operation.
96
+ #
97
+ # Use {Speech::Audio#recognize} for synchronous speech recognition that
98
+ # returns {Result} objects only after all audio has been processed. This
99
+ # method is limited to audio data of 1 minute or less in duration, and will
100
+ # take roughly the same amount of time to process as the duration of the
101
+ # supplied audio data.
102
+ #
103
+ # ```ruby
104
+ # require "google/cloud"
105
+ #
106
+ # gcloud = Google::Cloud.new
107
+ # speech = gcloud.speech
108
+ #
109
+ # audio = speech.audio "path/to/audio.raw",
110
+ # encoding: :raw, sample_rate: 16000
111
+ # results = audio.recognize
112
+ #
113
+ # result = results.first
114
+ # result.transcript #=> "how old is the Brooklyn Bridge"
115
+ # result.confidence #=> 88.15
116
+ # ```
117
+ #
118
+ # Use {Speech::Audio#recognize_job} for asynchronous speech recognition,
119
+ # in which a {Job} is returned immediately after the audio data has
120
+ # been sent. The job can be refreshed to retrieve {Result} objects
121
+ # once the audio data has been processed.
122
+ #
123
+ # ```ruby
124
+ # require "google/cloud"
125
+ #
126
+ # gcloud = Google::Cloud.new
127
+ # speech = gcloud.speech
128
+ #
129
+ # audio = speech.audio "path/to/audio.raw",
130
+ # encoding: :raw, sample_rate: 16000
131
+ # job = audio.recognize_job
132
+ #
133
+ # job.done? #=> false
134
+ # job.reload!
135
+ # job.done? #=> true
136
+ # results = job.results
137
+ #
138
+ # result = results.first
139
+ # result.transcript #=> "how old is the Brooklyn Bridge"
140
+ # result.confidence #=> 88.15
141
+ # ```
142
+ #
143
+ module Speech
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,230 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Speech
19
+ ##
20
+ # # Audio
21
+ #
22
+ # Represents a source of audio data, with related metadata such as the
23
+ # [audio encoding](https://cloud.google.com/speech/docs/basics#audio-encodings),
24
+ # [sample rate](https://cloud.google.com/speech/docs/basics#sample-rates),
25
+ # and [language](https://cloud.google.com/speech/docs/basics#languages).
26
+ #
27
+ # See {Project#audio}.
28
+ #
29
+ # @see https://cloud.google.com/speech/docs/basics#audio-encodings
30
+ # Audio Encodings
31
+ # @see https://cloud.google.com/speech/docs/basics#sample-rates
32
+ # Sample Rates
33
+ # @see https://cloud.google.com/speech/docs/basics#languages
34
+ # Languages
35
+ #
36
+ # @example
37
+ # require "google/cloud"
38
+ #
39
+ # gcloud = Google::Cloud.new
40
+ # speech = gcloud.speech
41
+ #
42
+ # audio = speech.audio "path/to/audio.raw",
43
+ # encoding: :raw, sample_rate: 16000
44
+ # results = audio.recognize
45
+ #
46
+ # result = results.first
47
+ # result.transcript #=> "how old is the Brooklyn Bridge"
48
+ # result.confidence #=> 88.15
49
+ #
50
+ class Audio
51
+ # @private The V1beta1::RecognitionAudio object.
52
+ attr_reader :grpc
53
+ # @private The Project object.
54
+ attr_reader :speech
55
+ attr_accessor :encoding
56
+ attr_accessor :sample_rate
57
+ attr_accessor :language
58
+
59
+ ##
60
+ # @private Creates a new Audio instance.
61
+ def initialize
62
+ @grpc = V1beta1::RecognitionAudio.new
63
+ end
64
+
65
+ ##
66
+ # @private Whether the Audio has content.
67
+ #
68
+ def content?
69
+ @grpc.audio_source == :content
70
+ end
71
+
72
+ ##
73
+ # @private Whether the Audio is a URL.
74
+ #
75
+ def url?
76
+ @grpc.audio_source == :uri
77
+ end
78
+
79
+ ##
80
+ # Performs synchronous speech recognition. Sends audio data to the
81
+ # Speech API, which performs recognition on that data, and returns
82
+ # results only after all audio has been processed. Limited to audio data
83
+ # of 1 minute or less in duration.
84
+ #
85
+ # The Speech API will take roughly the same amount of time to process
86
+ # audio data sent synchronously as the duration of the supplied audio
87
+ # data. That is, if you send audio data of 30 seconds in length, expect
88
+ # the synchronous request to take approximately 30 seconds to return
89
+ # results.
90
+ #
91
+ # @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
92
+ # Synchronous Speech API Recognition
93
+ # @see https://cloud.google.com/speech/docs/basics#phrase-hints
94
+ # Phrase Hints
95
+ #
96
+ # @param [String] max_alternatives The Maximum number of recognition
97
+ # hypotheses to be returned. Default is 1. The service may return
98
+ # fewer. Valid values are 0-30. Defaults to 1. Optional.
99
+ # @param [Boolean] profanity_filter When `true`, the service will
100
+ # attempt to filter out profanities, replacing all but the initial
101
+ # character in each filtered word with asterisks, e.g. "f***". Default
102
+ # is `false`.
103
+ # @param [Array<String>] phrases A list of strings containing words and
104
+ # phrases "hints" so that the speech recognition is more likely to
105
+ # recognize them. See [usage
106
+ # limits](https://cloud.google.com/speech/limits#content). Optional.
107
+ #
108
+ # @return [Array<Result>] The transcribed text of audio recognized.
109
+ #
110
+ # @example
111
+ # require "google/cloud"
112
+ #
113
+ # gcloud = Google::Cloud.new
114
+ # speech = gcloud.speech
115
+ #
116
+ # audio = speech.audio "path/to/audio.raw",
117
+ # encoding: :raw, sample_rate: 16000
118
+ # results = audio.recognize
119
+ #
120
+ # result = results.first
121
+ # result.transcript #=> "how old is the Brooklyn Bridge"
122
+ # result.confidence #=> 88.15
123
+ #
124
+ def recognize max_alternatives: nil, profanity_filter: nil, phrases: nil
125
+ ensure_speech!
126
+
127
+ speech.recognize self, encoding: encoding, sample_rate: sample_rate,
128
+ language: language,
129
+ max_alternatives: max_alternatives,
130
+ profanity_filter: profanity_filter,
131
+ phrases: phrases
132
+ end
133
+
134
+ ##
135
+ # Performs asynchronous speech recognition. Requests are processed
136
+ # asynchronously, meaning a Job is returned once the audio data has been
137
+ # sent, and can be refreshed to retrieve recognition results once the
138
+ # audio data has been processed.
139
+ #
140
+ # @see https://cloud.google.com/speech/docs/basics#async-responses
141
+ # Asynchronous Speech API Responses
142
+ #
143
+ # @param [String] max_alternatives The Maximum number of recognition
144
+ # hypotheses to be returned. Default is 1. The service may return
145
+ # fewer. Valid values are 0-30. Defaults to 1. Optional.
146
+ # @param [Boolean] profanity_filter When `true`, the service will
147
+ # attempt to filter out profanities, replacing all but the initial
148
+ # character in each filtered word with asterisks, e.g. "f***". Default
149
+ # is `false`.
150
+ # @param [Array<String>] phrases A list of strings containing words and
151
+ # phrases "hints" so that the speech recognition is more likely to
152
+ # recognize them. See [usage
153
+ # limits](https://cloud.google.com/speech/limits#content). Optional.
154
+ #
155
+ # @return [Job] A resource represents the long-running, asynchronous
156
+ # processing of a speech-recognition operation.
157
+ #
158
+ # @example
159
+ # require "google/cloud"
160
+ #
161
+ # gcloud = Google::Cloud.new
162
+ # speech = gcloud.speech
163
+ #
164
+ # audio = speech.audio "path/to/audio.raw",
165
+ # encoding: :raw, sample_rate: 16000
166
+ # job = audio.recognize_job
167
+ #
168
+ # job.done? #=> false
169
+ # job.reload!
170
+ # job.done? #=> true
171
+ # results = job.results
172
+ #
173
+ def recognize_job max_alternatives: nil, profanity_filter: nil,
174
+ phrases: nil
175
+ ensure_speech!
176
+
177
+ speech.recognize_job self, encoding: encoding,
178
+ sample_rate: sample_rate,
179
+ language: language,
180
+ max_alternatives: max_alternatives,
181
+ profanity_filter: profanity_filter,
182
+ phrases: phrases
183
+ end
184
+
185
+ ##
186
+ # @private The Google API Client object for the Audio.
187
+ def to_grpc
188
+ @grpc
189
+ end
190
+
191
+ ##
192
+ # @private New Audio from a source object.
193
+ def self.from_source source, speech
194
+ audio = new
195
+ audio.instance_variable_set :@speech, speech
196
+ if source.respond_to?(:read) && source.respond_to?(:rewind)
197
+ source.rewind
198
+ audio.grpc.content = source.read
199
+ return audio
200
+ end
201
+ # Convert Storage::File objects to the URL
202
+ source = source.to_gs_url if source.respond_to? :to_gs_url
203
+ # Everything should be a string from now on
204
+ source = String source
205
+ # Create an Audio from the Google Storage URL
206
+ if source.start_with? "gs://"
207
+ audio.grpc.uri = source
208
+ return audio
209
+ end
210
+ # Create an audio from a file on the filesystem
211
+ if File.file? source
212
+ fail ArgumentError, "Cannot read #{source}" unless \
213
+ File.readable? source
214
+ audio.grpc.content = File.read source, mode: "rb"
215
+ return audio
216
+ end
217
+ fail ArgumentError, "Unable to convert #{source} to an Audio"
218
+ end
219
+
220
+ protected
221
+
222
+ ##
223
+ # Raise an error unless an active Speech Project object is available.
224
+ def ensure_speech!
225
+ fail "Must have active connection" unless @speech
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end