google-cloud-speech 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 83cf6bc0f6b7cda41a256f9e35d8d7b5d0cf1ecf
4
+ data.tar.gz: f5a81bafbd2102e775d329edb496457dd6ae81d8
5
+ SHA512:
6
+ metadata.gz: fed9b104e7bbd9d1e01fb34095f2f6e495c2940727c474bf2d724a1e0341f7e179f2b8be81916f8f7449322eda03b7ff86cbdce3ca1f73b17ee3d9877699d0cf
7
+ data.tar.gz: 63b312ffd750b1ae643bb7246832455f85d7c37b21058a179644c6eefa694f38800076bd2b83d946dbd541bbc94b9f82153d034e7bc96e07b26326a37036cc8a
@@ -0,0 +1,117 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ##
16
+ # This file is here to be autorequired by bundler, so that the .speech and
17
+ # #speech methods can be available, but the library and all dependencies won't
18
+ # be loaded until required and used.
19
+
20
+
21
+ gem "google-cloud-core"
22
+ require "google/cloud"
23
+
24
+ module Google
25
+ module Cloud
26
+ ##
27
+ # Creates a new object for connecting to the Speech service.
28
+ # Each call creates a new connection.
29
+ #
30
+ # For more information on connecting to Google Cloud see the [Authentication
31
+ # Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
32
+ #
33
+ # @param [String, Array<String>] scope The OAuth 2.0 scopes controlling the
34
+ # set of resources and operations that the connection can access. See
35
+ # [Using OAuth 2.0 to Access Google
36
+ # APIs](https://developers.google.com/identity/protocols/OAuth2).
37
+ #
38
+ # The default scope is:
39
+ #
40
+ # * `https://www.googleapis.com/auth/speech`
41
+ # @param [Integer] timeout Default timeout to use in requests. Optional.
42
+ # @param [Hash] client_config A hash of values to override the default
43
+ # behavior of the API client. See Google::Gax::CallSettings. Optional.
44
+ #
45
+ # @return [Google::Cloud::Speech::Project]
46
+ #
47
+ # @example
48
+ # require "google/cloud"
49
+ #
50
+ # gcloud = Google::Cloud.new
51
+ # speech = gcloud.speech
52
+ #
53
+ # audio = speech.audio "path/to/audio.raw",
54
+ # encoding: :raw, sample_rate: 16000
55
+ #
56
+ # @example The default scope can be overridden with the `scope` option:
57
+ # require "google/cloud"
58
+ #
59
+ # gcloud = Google::Cloud.new
60
+ # platform_scope = "https://www.googleapis.com/auth/cloud-platform"
61
+ # speech = gcloud.speech scope: platform_scope
62
+ #
63
+ def speech scope: nil, timeout: nil, client_config: nil
64
+ Google::Cloud.speech @project, @keyfile, scope: scope,
65
+ timeout: (timeout || @timeout),
66
+ client_config: client_config
67
+ end
68
+
69
+ ##
70
+ # Creates a new object for connecting to the Speech service.
71
+ # Each call creates a new connection.
72
+ #
73
+ # For more information on connecting to Google Cloud see the [Authentication
74
+ # Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
75
+ #
76
+ # @param [String] project Project identifier for the Speech service you are
77
+ # connecting to.
78
+ # @param [String, Hash] keyfile Keyfile downloaded from Google Cloud. If
79
+ # file path the file must be readable.
80
+ # @param [String, Array<String>] scope The OAuth 2.0 scopes controlling the
81
+ # set of resources and operations that the connection can access. See
82
+ # [Using OAuth 2.0 to Access Google
83
+ # APIs](https://developers.google.com/identity/protocols/OAuth2).
84
+ #
85
+ # The default scope is:
86
+ #
87
+ # * `https://www.googleapis.com/auth/speech`
88
+ # @param [Integer] timeout Default timeout to use in requests. Optional.
89
+ # @param [Hash] client_config A hash of values to override the default
90
+ # behavior of the API client. See Google::Gax::CallSettings. Optional.
91
+ #
92
+ # @return [Google::Cloud::Speech::Project]
93
+ #
94
+ # @example
95
+ # require "google/cloud/speech"
96
+ #
97
+ # speech = Google::Cloud.speech
98
+ #
99
+ # audio = speech.audio "path/to/audio.raw",
100
+ # encoding: :raw, sample_rate: 16000
101
+ #
102
+ def self.speech project = nil, keyfile = nil, scope: nil, timeout: nil,
103
+ client_config: nil
104
+ require "google/cloud/speech"
105
+ project ||= Google::Cloud::Speech::Project.default_project
106
+ if keyfile.nil?
107
+ credentials = Google::Cloud::Speech::Credentials.default scope: scope
108
+ else
109
+ credentials = Google::Cloud::Speech::Credentials.new(
110
+ keyfile, scope: scope)
111
+ end
112
+ Google::Cloud::Speech::Project.new(
113
+ Google::Cloud::Speech::Service.new(
114
+ project, credentials, timeout: timeout, client_config: client_config))
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,146 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google-cloud-speech"
17
+ require "google/cloud/speech/project"
18
+
19
+ module Google
20
+ module Cloud
21
+ ##
22
+ # # Google Cloud Speech
23
+ #
24
+ # Google Cloud Speech API enables developers to convert audio to text by
25
+ # applying powerful neural network models in an easy to use API. The API
26
+ # recognizes over 80 languages and variants, to support your global user
27
+ # base. You can transcribe the text of users dictating to an application's
28
+ # microphone, enable command-and-control through voice, or transcribe audio
29
+ # files, among many other use cases. Recognize audio uploaded in the
30
+ # request, and integrate with your audio storage on Google Cloud Storage, by
31
+ # using the same technology Google uses to power its own products.
32
+ #
33
+ # For more information about Google Cloud Speech API, read the [Google Cloud
34
+ # Speech API Documentation](https://cloud.google.com/speech/docs/).
35
+ #
36
+ # The goal of google-cloud is to provide an API that is comfortable to
37
+ # Rubyists. Authentication is handled by {Google::Cloud#speech}. You can
38
+ # provide the project and credential information to connect to the Cloud
39
+ # Speech service, or if you are running on Google Compute Engine this
40
+ # configuration is taken care of for you. You can read more about the
41
+ # options for connecting in the [Authentication
42
+ # Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
43
+ #
44
+ # ## Creating audio sources
45
+ #
46
+ # You can create an audio object that holds a reference to any one of
47
+ # several types of audio data source, along with metadata such as the audio
48
+ # encoding type.
49
+ #
50
+ # Use {Speech::Project#audio} to create audio sources for the Cloud Speech
51
+ # API. You can provide a file path:
52
+ #
53
+ # ```ruby
54
+ # require "google/cloud"
55
+ #
56
+ # gcloud = Google::Cloud.new
57
+ # speech = gcloud.speech
58
+ #
59
+ # audio = speech.audio "path/to/audio.raw",
60
+ # encoding: :raw, sample_rate: 16000
61
+ # ```
62
+ #
63
+ # Or, you can initialize the audio instance with a Google Cloud Storage URI:
64
+ #
65
+ # ```ruby
66
+ # require "google/cloud"
67
+ #
68
+ # gcloud = Google::Cloud.new
69
+ # speech = gcloud.speech
70
+ #
71
+ # audio = speech.audio "gs://bucket-name/path/to/audio.raw",
72
+ # encoding: :raw, sample_rate: 16000
73
+ # ```
74
+ #
75
+ # Or, with a Google Cloud Storage File object:
76
+ #
77
+ # ```ruby
78
+ # require "google/cloud"
79
+ #
80
+ # gcloud = Google::Cloud.new
81
+ # storage = gcloud.storage
82
+ #
83
+ # bucket = storage.bucket "bucket-name"
84
+ # file = bucket.file "path/to/audio.raw"
85
+ #
86
+ # speech = gcloud.speech
87
+ #
88
+ # audio = speech.audio file, encoding: :raw, sample_rate: 16000
89
+ # ```
90
+ #
91
+ # ## Recognizing speech
92
+ #
93
+ # The instance methods on {Speech::Audio} can be used to invoke both
94
+ # synchronous and asynchronous versions of the Cloud Speech API speech
95
+ # recognition operation.
96
+ #
97
+ # Use {Speech::Audio#recognize} for synchronous speech recognition that
98
+ # returns {Result} objects only after all audio has been processed. This
99
+ # method is limited to audio data of 1 minute or less in duration, and will
100
+ # take roughly the same amount of time to process as the duration of the
101
+ # supplied audio data.
102
+ #
103
+ # ```ruby
104
+ # require "google/cloud"
105
+ #
106
+ # gcloud = Google::Cloud.new
107
+ # speech = gcloud.speech
108
+ #
109
+ # audio = speech.audio "path/to/audio.raw",
110
+ # encoding: :raw, sample_rate: 16000
111
+ # results = audio.recognize
112
+ #
113
+ # result = results.first
114
+ # result.transcript #=> "how old is the Brooklyn Bridge"
115
+ # result.confidence #=> 88.15
116
+ # ```
117
+ #
118
+ # Use {Speech::Audio#recognize_job} for asynchronous speech recognition,
119
+ # in which a {Job} is returned immediately after the audio data has
120
+ # been sent. The job can be refreshed to retrieve {Result} objects
121
+ # once the audio data has been processed.
122
+ #
123
+ # ```ruby
124
+ # require "google/cloud"
125
+ #
126
+ # gcloud = Google::Cloud.new
127
+ # speech = gcloud.speech
128
+ #
129
+ # audio = speech.audio "path/to/audio.raw",
130
+ # encoding: :raw, sample_rate: 16000
131
+ # job = audio.recognize_job
132
+ #
133
+ # job.done? #=> false
134
+ # job.reload!
135
+ # job.done? #=> true
136
+ # results = job.results
137
+ #
138
+ # result = results.first
139
+ # result.transcript #=> "how old is the Brooklyn Bridge"
140
+ # result.confidence #=> 88.15
141
+ # ```
142
+ #
143
+ module Speech
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,230 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Speech
19
+ ##
20
+ # # Audio
21
+ #
22
+ # Represents a source of audio data, with related metadata such as the
23
+ # [audio encoding](https://cloud.google.com/speech/docs/basics#audio-encodings),
24
+ # [sample rate](https://cloud.google.com/speech/docs/basics#sample-rates),
25
+ # and [language](https://cloud.google.com/speech/docs/basics#languages).
26
+ #
27
+ # See {Project#audio}.
28
+ #
29
+ # @see https://cloud.google.com/speech/docs/basics#audio-encodings
30
+ # Audio Encodings
31
+ # @see https://cloud.google.com/speech/docs/basics#sample-rates
32
+ # Sample Rates
33
+ # @see https://cloud.google.com/speech/docs/basics#languages
34
+ # Languages
35
+ #
36
+ # @example
37
+ # require "google/cloud"
38
+ #
39
+ # gcloud = Google::Cloud.new
40
+ # speech = gcloud.speech
41
+ #
42
+ # audio = speech.audio "path/to/audio.raw",
43
+ # encoding: :raw, sample_rate: 16000
44
+ # results = audio.recognize
45
+ #
46
+ # result = results.first
47
+ # result.transcript #=> "how old is the Brooklyn Bridge"
48
+ # result.confidence #=> 88.15
49
+ #
50
+ class Audio
51
+ # @private The V1beta1::RecognitionAudio object.
52
+ attr_reader :grpc
53
+ # @private The Project object.
54
+ attr_reader :speech
55
+ attr_accessor :encoding
56
+ attr_accessor :sample_rate
57
+ attr_accessor :language
58
+
59
+ ##
60
+ # @private Creates a new Audio instance.
61
+ def initialize
62
+ @grpc = V1beta1::RecognitionAudio.new
63
+ end
64
+
65
+ ##
66
+ # @private Whether the Audio has content.
67
+ #
68
+ def content?
69
+ @grpc.audio_source == :content
70
+ end
71
+
72
+ ##
73
+ # @private Whether the Audio is a URL.
74
+ #
75
+ def url?
76
+ @grpc.audio_source == :uri
77
+ end
78
+
79
+ ##
80
+ # Performs synchronous speech recognition. Sends audio data to the
81
+ # Speech API, which performs recognition on that data, and returns
82
+ # results only after all audio has been processed. Limited to audio data
83
+ # of 1 minute or less in duration.
84
+ #
85
+ # The Speech API will take roughly the same amount of time to process
86
+ # audio data sent synchronously as the duration of the supplied audio
87
+ # data. That is, if you send audio data of 30 seconds in length, expect
88
+ # the synchronous request to take approximately 30 seconds to return
89
+ # results.
90
+ #
91
+ # @see https://cloud.google.com/speech/docs/basics#synchronous-recognition
92
+ # Synchronous Speech API Recognition
93
+ # @see https://cloud.google.com/speech/docs/basics#phrase-hints
94
+ # Phrase Hints
95
+ #
96
+ # @param [String] max_alternatives The Maximum number of recognition
97
+ # hypotheses to be returned. Default is 1. The service may return
98
+ # fewer. Valid values are 0-30. Defaults to 1. Optional.
99
+ # @param [Boolean] profanity_filter When `true`, the service will
100
+ # attempt to filter out profanities, replacing all but the initial
101
+ # character in each filtered word with asterisks, e.g. "f***". Default
102
+ # is `false`.
103
+ # @param [Array<String>] phrases A list of strings containing words and
104
+ # phrases "hints" so that the speech recognition is more likely to
105
+ # recognize them. See [usage
106
+ # limits](https://cloud.google.com/speech/limits#content). Optional.
107
+ #
108
+ # @return [Array<Result>] The transcribed text of audio recognized.
109
+ #
110
+ # @example
111
+ # require "google/cloud"
112
+ #
113
+ # gcloud = Google::Cloud.new
114
+ # speech = gcloud.speech
115
+ #
116
+ # audio = speech.audio "path/to/audio.raw",
117
+ # encoding: :raw, sample_rate: 16000
118
+ # results = audio.recognize
119
+ #
120
+ # result = results.first
121
+ # result.transcript #=> "how old is the Brooklyn Bridge"
122
+ # result.confidence #=> 88.15
123
+ #
124
+ def recognize max_alternatives: nil, profanity_filter: nil, phrases: nil
125
+ ensure_speech!
126
+
127
+ speech.recognize self, encoding: encoding, sample_rate: sample_rate,
128
+ language: language,
129
+ max_alternatives: max_alternatives,
130
+ profanity_filter: profanity_filter,
131
+ phrases: phrases
132
+ end
133
+
134
+ ##
135
+ # Performs asynchronous speech recognition. Requests are processed
136
+ # asynchronously, meaning a Job is returned once the audio data has been
137
+ # sent, and can be refreshed to retrieve recognition results once the
138
+ # audio data has been processed.
139
+ #
140
+ # @see https://cloud.google.com/speech/docs/basics#async-responses
141
+ # Asynchronous Speech API Responses
142
+ #
143
+ # @param [String] max_alternatives The Maximum number of recognition
144
+ # hypotheses to be returned. Default is 1. The service may return
145
+ # fewer. Valid values are 0-30. Defaults to 1. Optional.
146
+ # @param [Boolean] profanity_filter When `true`, the service will
147
+ # attempt to filter out profanities, replacing all but the initial
148
+ # character in each filtered word with asterisks, e.g. "f***". Default
149
+ # is `false`.
150
+ # @param [Array<String>] phrases A list of strings containing words and
151
+ # phrases "hints" so that the speech recognition is more likely to
152
+ # recognize them. See [usage
153
+ # limits](https://cloud.google.com/speech/limits#content). Optional.
154
+ #
155
+ # @return [Job] A resource represents the long-running, asynchronous
156
+ # processing of a speech-recognition operation.
157
+ #
158
+ # @example
159
+ # require "google/cloud"
160
+ #
161
+ # gcloud = Google::Cloud.new
162
+ # speech = gcloud.speech
163
+ #
164
+ # audio = speech.audio "path/to/audio.raw",
165
+ # encoding: :raw, sample_rate: 16000
166
+ # job = audio.recognize_job
167
+ #
168
+ # job.done? #=> false
169
+ # job.reload!
170
+ # job.done? #=> true
171
+ # results = job.results
172
+ #
173
+ def recognize_job max_alternatives: nil, profanity_filter: nil,
174
+ phrases: nil
175
+ ensure_speech!
176
+
177
+ speech.recognize_job self, encoding: encoding,
178
+ sample_rate: sample_rate,
179
+ language: language,
180
+ max_alternatives: max_alternatives,
181
+ profanity_filter: profanity_filter,
182
+ phrases: phrases
183
+ end
184
+
185
+ ##
186
+ # @private The Google API Client object for the Audio.
187
+ def to_grpc
188
+ @grpc
189
+ end
190
+
191
+ ##
192
+ # @private New Audio from a source object.
193
+ def self.from_source source, speech
194
+ audio = new
195
+ audio.instance_variable_set :@speech, speech
196
+ if source.respond_to?(:read) && source.respond_to?(:rewind)
197
+ source.rewind
198
+ audio.grpc.content = source.read
199
+ return audio
200
+ end
201
+ # Convert Storage::File objects to the URL
202
+ source = source.to_gs_url if source.respond_to? :to_gs_url
203
+ # Everything should be a string from now on
204
+ source = String source
205
+ # Create an Audio from the Google Storage URL
206
+ if source.start_with? "gs://"
207
+ audio.grpc.uri = source
208
+ return audio
209
+ end
210
+ # Create an audio from a file on the filesystem
211
+ if File.file? source
212
+ fail ArgumentError, "Cannot read #{source}" unless \
213
+ File.readable? source
214
+ audio.grpc.content = File.read source, mode: "rb"
215
+ return audio
216
+ end
217
+ fail ArgumentError, "Unable to convert #{source} to an Audio"
218
+ end
219
+
220
+ protected
221
+
222
+ ##
223
+ # Raise an error unless an active Speech Project object is available.
224
+ def ensure_speech!
225
+ fail "Must have active connection" unless @speech
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end