google-cloud-speech 0.21.0 → 0.21.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +9 -0
- data/LICENSE +201 -0
- data/README.md +62 -0
- data/lib/google/cloud/speech.rb +2 -2
- data/lib/google/cloud/speech/project.rb +4 -3
- data/lib/google/cloud/speech/result.rb +3 -3
- data/lib/google/cloud/speech/stream.rb +42 -39
- data/lib/google/cloud/speech/v1beta1/doc/google/cloud/speech/v1beta1/cloud_speech.rb +377 -0
- data/lib/google/cloud/speech/v1beta1/speech_api.rb +4 -4
- data/lib/google/cloud/speech/version.rb +1 -1
- metadata +9 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a35a3d6fd09318b755221c8c3f95ef781ebe897f
|
4
|
+
data.tar.gz: 15079061caf8289b64f42f39f5d110d3500d1250
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f55af213c9e770573b0c32f1993dfef05e1566de1b6a0e2643240a2a872319430f218e5f45243750d6bda7d00a46aad1d91bc88473290c6984ba07ca9381c3f1
|
7
|
+
data.tar.gz: ae663be1584beadca2eaeb04090fe313de27ce523099bb770c10fec85f6c8f6861a43bbc6310e19e74958b79a273f10da2033c7b0f8ace09d934f1adec304131
|
data/.yardopts
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
Apache License
|
2
|
+
Version 2.0, January 2004
|
3
|
+
http://www.apache.org/licenses/
|
4
|
+
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6
|
+
|
7
|
+
1. Definitions.
|
8
|
+
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
11
|
+
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13
|
+
the copyright owner that is granting the License.
|
14
|
+
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
16
|
+
other entities that control, are controlled by, or are under common
|
17
|
+
control with that entity. For the purposes of this definition,
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
19
|
+
direction or management of such entity, whether by contract or
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22
|
+
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24
|
+
exercising permissions granted by this License.
|
25
|
+
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
27
|
+
including but not limited to software source code, documentation
|
28
|
+
source, and configuration files.
|
29
|
+
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
31
|
+
transformation or translation of a Source form, including but
|
32
|
+
not limited to compiled object code, generated documentation,
|
33
|
+
and conversions to other media types.
|
34
|
+
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
36
|
+
Object form, made available under the License, as indicated by a
|
37
|
+
copyright notice that is included in or attached to the work
|
38
|
+
(an example is provided in the Appendix below).
|
39
|
+
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46
|
+
the Work and Derivative Works thereof.
|
47
|
+
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
49
|
+
the original version of the Work and any modifications or additions
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
61
|
+
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
64
|
+
subsequently incorporated within the Work.
|
65
|
+
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
72
|
+
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78
|
+
where such license applies only to those patent claims licensable
|
79
|
+
by such Contributor that are necessarily infringed by their
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
82
|
+
institute patent litigation against any entity (including a
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
85
|
+
or contributory patent infringement, then any patent licenses
|
86
|
+
granted to You under this License for that Work shall terminate
|
87
|
+
as of the date such litigation is filed.
|
88
|
+
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
91
|
+
modifications, and in Source or Object form, provided that You
|
92
|
+
meet the following conditions:
|
93
|
+
|
94
|
+
(a) You must give any other recipients of the Work or
|
95
|
+
Derivative Works a copy of this License; and
|
96
|
+
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
98
|
+
stating that You changed the files; and
|
99
|
+
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
102
|
+
attribution notices from the Source form of the Work,
|
103
|
+
excluding those notices that do not pertain to any part of
|
104
|
+
the Derivative Works; and
|
105
|
+
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
108
|
+
include a readable copy of the attribution notices contained
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
111
|
+
of the following places: within a NOTICE text file distributed
|
112
|
+
as part of the Derivative Works; within the Source form or
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
114
|
+
within a display generated by the Derivative Works, if and
|
115
|
+
wherever such third-party notices normally appear. The contents
|
116
|
+
of the NOTICE file are for informational purposes only and
|
117
|
+
do not modify the License. You may add Your own attribution
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
120
|
+
that such additional attribution notices cannot be construed
|
121
|
+
as modifying the License.
|
122
|
+
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
124
|
+
may provide additional or different license terms and conditions
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
128
|
+
the conditions stated in this License.
|
129
|
+
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
133
|
+
this License, without any additional terms or conditions.
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135
|
+
the terms of any separate license agreement you may have executed
|
136
|
+
with Licensor regarding such Contributions.
|
137
|
+
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
140
|
+
except as required for reasonable and customary use in describing the
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
142
|
+
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
152
|
+
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
158
|
+
incidental, or consequential damages of any character arising as a
|
159
|
+
result of this License or out of the use or inability to use the
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
162
|
+
other commercial damages or losses), even if such Contributor
|
163
|
+
has been advised of the possibility of such damages.
|
164
|
+
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168
|
+
or other liability obligations and/or rights consistent with this
|
169
|
+
License. However, in accepting such obligations, You may act only
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
174
|
+
of your accepting any such warranty or additional liability.
|
175
|
+
|
176
|
+
END OF TERMS AND CONDITIONS
|
177
|
+
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
179
|
+
|
180
|
+
To apply the Apache License to your work, attach the following
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182
|
+
replaced with your own identifying information. (Don't include
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
184
|
+
comment syntax for the file format. We also recommend that a
|
185
|
+
file or class name and description of purpose be included on the
|
186
|
+
same "printed page" as the copyright notice for easier
|
187
|
+
identification within third-party archives.
|
188
|
+
|
189
|
+
Copyright [yyyy] [name of copyright owner]
|
190
|
+
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192
|
+
you may not use this file except in compliance with the License.
|
193
|
+
You may obtain a copy of the License at
|
194
|
+
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
196
|
+
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200
|
+
See the License for the specific language governing permissions and
|
201
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# google-cloud-speech
|
2
|
+
|
3
|
+
[Google Cloud Speech API](https://cloud.google.com/speech/) ([docs](https://cloud.google.com/speech/docs)) enables developers to convert audio to text by applying powerful neural network models.
|
4
|
+
|
5
|
+
- [google-cloud-speech API documentation](http://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/master/google/cloud/speech)
|
6
|
+
- [google-cloud-speech on RubyGems](https://rubygems.org/gems/google-cloud-speech)
|
7
|
+
- [Google Cloud Speech API documentation](https://cloud.google.com/speech/docs)
|
8
|
+
|
9
|
+
## Quick Start
|
10
|
+
|
11
|
+
```sh
|
12
|
+
$ gem install google-cloud-speech
|
13
|
+
```
|
14
|
+
|
15
|
+
## Authentication
|
16
|
+
|
17
|
+
This library uses Service Account credentials to connect to Google Cloud services. When running on Compute Engine the credentials will be discovered automatically. When running on other environments the Service Account credentials can be specified by providing the path to the JSON file, or the JSON itself, in environment variables.
|
18
|
+
|
19
|
+
Instructions and configuration options are covered in the [Authentication Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/guides/authentication).
|
20
|
+
|
21
|
+
## Example
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require "google/cloud/speech"
|
25
|
+
|
26
|
+
speech = Google::Cloud::Speech.new
|
27
|
+
|
28
|
+
audio = speech.audio "path/to/audio.raw",
|
29
|
+
encoding: :raw, sample_rate: 16000
|
30
|
+
results = audio.recognize
|
31
|
+
|
32
|
+
result = results.first
|
33
|
+
result.transcript #=> "how old is the Brooklyn Bridge"
|
34
|
+
result.confidence #=> 0.9826789498329163
|
35
|
+
```
|
36
|
+
|
37
|
+
## Supported Ruby Versions
|
38
|
+
|
39
|
+
This library is supported on Ruby 2.0+.
|
40
|
+
|
41
|
+
## Versioning
|
42
|
+
|
43
|
+
This library follows [Semantic Versioning](http://semver.org/).
|
44
|
+
|
45
|
+
It is currently in major version zero (0.y.z), which means that anything may change at any time and the public API should not be considered stable.
|
46
|
+
|
47
|
+
## Contributing
|
48
|
+
|
49
|
+
Contributions to this library are always welcome and highly encouraged.
|
50
|
+
|
51
|
+
See the [Contributing Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/contributing) for more information on how to get started.
|
52
|
+
|
53
|
+
Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms. See [Code of Conduct](../CODE_OF_CONDUCT.md) for more information.
|
54
|
+
|
55
|
+
## License
|
56
|
+
|
57
|
+
This library is licensed under Apache 2.0. Full license text is available in [LICENSE](LICENSE).
|
58
|
+
|
59
|
+
## Support
|
60
|
+
|
61
|
+
Please [report bugs at the project on Github](https://github.com/GoogleCloudPlatform/google-cloud-ruby/issues).
|
62
|
+
Don't hesitate to [ask questions](http://stackoverflow.com/questions/tagged/google-cloud-ruby) about the client or APIs on [StackOverflow](http://stackoverflow.com).
|
data/lib/google/cloud/speech.rb
CHANGED
@@ -158,9 +158,9 @@ module Google
|
|
158
158
|
# result.confidence #=> 0.9826789498329163
|
159
159
|
# end
|
160
160
|
#
|
161
|
-
# # Stream 5 seconds of audio from the
|
161
|
+
# # Stream 5 seconds of audio from the microphone
|
162
162
|
# # Actual implementation of microphone input varies by platform
|
163
|
-
# 5.times
|
163
|
+
# 5.times do
|
164
164
|
# stream.send MicrophoneInput.read(32000)
|
165
165
|
# end
|
166
166
|
#
|
@@ -462,7 +462,7 @@ module Google
|
|
462
462
|
#
|
463
463
|
# speech = Google::Cloud::Speech.new
|
464
464
|
#
|
465
|
-
# stream =
|
465
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
466
466
|
#
|
467
467
|
# # register callback for when a result is returned
|
468
468
|
# stream.on_result do |results|
|
@@ -471,9 +471,9 @@ module Google
|
|
471
471
|
# puts result.confidence # 0.9826789498329163
|
472
472
|
# end
|
473
473
|
#
|
474
|
-
# # Stream 5 seconds of audio from the
|
474
|
+
# # Stream 5 seconds of audio from the microphone
|
475
475
|
# # Actual implementation of microphone input varies by platform
|
476
|
-
# 5.times
|
476
|
+
# 5.times do
|
477
477
|
# stream.send MicrophoneInput.read(32000)
|
478
478
|
# end
|
479
479
|
#
|
@@ -509,6 +509,7 @@ module Google
|
|
509
509
|
phrases: nil
|
510
510
|
context = nil
|
511
511
|
context = V1beta1::SpeechContext.new(phrases: phrases) if phrases
|
512
|
+
language = String(language) unless language.nil?
|
512
513
|
V1beta1::RecognitionConfig.new({
|
513
514
|
encoding: convert_encoding(encoding),
|
514
515
|
sample_rate: sample_rate,
|
@@ -150,7 +150,7 @@ module Google
|
|
150
150
|
#
|
151
151
|
# speech = Google::Cloud::Speech.new
|
152
152
|
#
|
153
|
-
# stream =
|
153
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
154
154
|
#
|
155
155
|
# # register callback for when an interim result is returned
|
156
156
|
# stream.on_interim do |final_results, interim_results|
|
@@ -160,9 +160,9 @@ module Google
|
|
160
160
|
# puts interim_result.stability # 0.8999
|
161
161
|
# end
|
162
162
|
#
|
163
|
-
# # Stream 5 seconds of audio from the
|
163
|
+
# # Stream 5 seconds of audio from the microphone
|
164
164
|
# # Actual implementation of microphone input varies by platform
|
165
|
-
# 5.times
|
165
|
+
# 5.times do
|
166
166
|
# stream.send MicrophoneInput.read(32000)
|
167
167
|
# end
|
168
168
|
#
|
@@ -31,7 +31,7 @@ module Google
|
|
31
31
|
#
|
32
32
|
# speech = Google::Cloud::Speech.new
|
33
33
|
#
|
34
|
-
# stream =
|
34
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
35
35
|
#
|
36
36
|
# # register callback for when a result is returned
|
37
37
|
# stream.on_result do |results|
|
@@ -40,9 +40,9 @@ module Google
|
|
40
40
|
# puts result.confidence # 0.9826789498329163
|
41
41
|
# end
|
42
42
|
#
|
43
|
-
# # Stream 5 seconds of audio from the
|
43
|
+
# # Stream 5 seconds of audio from the microphone
|
44
44
|
# # Actual implementation of microphone input varies by platform
|
45
|
-
# 5.times
|
45
|
+
# 5.times do
|
46
46
|
# stream.send MicrophoneInput.read(32000)
|
47
47
|
# end
|
48
48
|
#
|
@@ -93,6 +93,8 @@ module Google
|
|
93
93
|
#
|
94
94
|
# speech = Google::Cloud::Speech.new
|
95
95
|
#
|
96
|
+
# audio = speech.audio "path/to/audio.raw"
|
97
|
+
#
|
96
98
|
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
97
99
|
#
|
98
100
|
# # register callback for when a result is returned
|
@@ -102,9 +104,9 @@ module Google
|
|
102
104
|
# puts result.confidence # 0.9826789498329163
|
103
105
|
# end
|
104
106
|
#
|
105
|
-
# # Stream 5 seconds of audio from the
|
107
|
+
# # Stream 5 seconds of audio from the microphone
|
106
108
|
# # Actual implementation of microphone input varies by platform
|
107
|
-
# 5.times
|
109
|
+
# 5.times do
|
108
110
|
# stream.send MicrophoneInput.read(32000)
|
109
111
|
# end
|
110
112
|
#
|
@@ -151,20 +153,21 @@ module Google
|
|
151
153
|
#
|
152
154
|
# speech = Google::Cloud::Speech.new
|
153
155
|
#
|
154
|
-
# stream =
|
156
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
155
157
|
#
|
156
|
-
# # Stream 5 seconds of audio from the
|
158
|
+
# # Stream 5 seconds of audio from the microphone
|
157
159
|
# # Actual implementation of microphone input varies by platform
|
158
|
-
# 5.times
|
160
|
+
# 5.times do
|
159
161
|
# stream.send MicrophoneInput.read(32000)
|
160
162
|
# end
|
161
163
|
#
|
162
164
|
# stream.stop
|
163
165
|
#
|
164
166
|
# results = stream.results
|
165
|
-
#
|
166
|
-
#
|
167
|
-
#
|
167
|
+
# results.each do |result|
|
168
|
+
# puts result.transcript
|
169
|
+
# puts result.confidence
|
170
|
+
# end
|
168
171
|
#
|
169
172
|
def results
|
170
173
|
synchronize do
|
@@ -184,7 +187,7 @@ module Google
|
|
184
187
|
#
|
185
188
|
# speech = Google::Cloud::Speech.new
|
186
189
|
#
|
187
|
-
# stream =
|
190
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
188
191
|
#
|
189
192
|
# # register callback for when an interim result is returned
|
190
193
|
# stream.on_interim do |final_results, interim_results|
|
@@ -194,9 +197,9 @@ module Google
|
|
194
197
|
# puts interim_result.stability # 0.8999
|
195
198
|
# end
|
196
199
|
#
|
197
|
-
# # Stream 5 seconds of audio from the
|
200
|
+
# # Stream 5 seconds of audio from the microphone
|
198
201
|
# # Actual implementation of microphone input varies by platform
|
199
|
-
# 5.times
|
202
|
+
# 5.times do
|
200
203
|
# stream.send MicrophoneInput.read(32000)
|
201
204
|
# end
|
202
205
|
#
|
@@ -227,7 +230,7 @@ module Google
|
|
227
230
|
#
|
228
231
|
# speech = Google::Cloud::Speech.new
|
229
232
|
#
|
230
|
-
# stream =
|
233
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
231
234
|
#
|
232
235
|
# # register callback for when an interim result is returned
|
233
236
|
# stream.on_result do |results|
|
@@ -236,9 +239,9 @@ module Google
|
|
236
239
|
# puts result.confidence # 0.9826789498329163
|
237
240
|
# end
|
238
241
|
#
|
239
|
-
# # Stream 5 seconds of audio from the
|
242
|
+
# # Stream 5 seconds of audio from the microphone
|
240
243
|
# # Actual implementation of microphone input varies by platform
|
241
|
-
# 5.times
|
244
|
+
# 5.times do
|
242
245
|
# stream.send MicrophoneInput.read(32000)
|
243
246
|
# end
|
244
247
|
#
|
@@ -259,7 +262,7 @@ module Google
|
|
259
262
|
result!
|
260
263
|
end
|
261
264
|
|
262
|
-
# @private yields each final results as they are
|
265
|
+
# @private yields each final results as they are received
|
263
266
|
def result!
|
264
267
|
synchronize do
|
265
268
|
@callbacks[:result].each { |c| c.call results }
|
@@ -278,16 +281,16 @@ module Google
|
|
278
281
|
#
|
279
282
|
# speech = Google::Cloud::Speech.new
|
280
283
|
#
|
281
|
-
# stream =
|
284
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
282
285
|
#
|
283
286
|
# # register callback for when speech has started.
|
284
287
|
# stream.on_speech_start do
|
285
288
|
# puts "Speech has started."
|
286
289
|
# end
|
287
290
|
#
|
288
|
-
# # Stream 5 seconds of audio from the
|
291
|
+
# # Stream 5 seconds of audio from the microphone
|
289
292
|
# # Actual implementation of microphone input varies by platform
|
290
|
-
# 5.times
|
293
|
+
# 5.times do
|
291
294
|
# stream.send MicrophoneInput.read(32000)
|
292
295
|
# end
|
293
296
|
#
|
@@ -300,7 +303,7 @@ module Google
|
|
300
303
|
end
|
301
304
|
|
302
305
|
# @private returns single final result once :END_OF_UTTERANCE is
|
303
|
-
#
|
306
|
+
# received.
|
304
307
|
def speech_start!
|
305
308
|
synchronize do
|
306
309
|
@callbacks[:speech_start].each(&:call)
|
@@ -319,16 +322,16 @@ module Google
|
|
319
322
|
#
|
320
323
|
# speech = Google::Cloud::Speech.new
|
321
324
|
#
|
322
|
-
# stream =
|
325
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
323
326
|
#
|
324
327
|
# # register callback for when speech has ended.
|
325
328
|
# stream.on_speech_end do
|
326
329
|
# puts "Speech has ended."
|
327
330
|
# end
|
328
331
|
#
|
329
|
-
# # Stream 5 seconds of audio from the
|
332
|
+
# # Stream 5 seconds of audio from the microphone
|
330
333
|
# # Actual implementation of microphone input varies by platform
|
331
|
-
# 5.times
|
334
|
+
# 5.times do
|
332
335
|
# stream.send MicrophoneInput.read(32000)
|
333
336
|
# end
|
334
337
|
#
|
@@ -341,7 +344,7 @@ module Google
|
|
341
344
|
end
|
342
345
|
|
343
346
|
# @private yields single final result once :END_OF_UTTERANCE is
|
344
|
-
#
|
347
|
+
# received.
|
345
348
|
def speech_end!
|
346
349
|
synchronize do
|
347
350
|
@callbacks[:speech_end].each(&:call)
|
@@ -360,16 +363,16 @@ module Google
|
|
360
363
|
#
|
361
364
|
# speech = Google::Cloud::Speech.new
|
362
365
|
#
|
363
|
-
# stream =
|
366
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
364
367
|
#
|
365
368
|
# # register callback for when audio has ended.
|
366
369
|
# stream.on_complete do
|
367
370
|
# puts "Audio has ended."
|
368
371
|
# end
|
369
372
|
#
|
370
|
-
# # Stream 5 seconds of audio from the
|
373
|
+
# # Stream 5 seconds of audio from the microphone
|
371
374
|
# # Actual implementation of microphone input varies by platform
|
372
|
-
# 5.times
|
375
|
+
# 5.times do
|
373
376
|
# stream.send MicrophoneInput.read(32000)
|
374
377
|
# end
|
375
378
|
#
|
@@ -405,9 +408,9 @@ module Google
|
|
405
408
|
#
|
406
409
|
# speech = Google::Cloud::Speech.new
|
407
410
|
#
|
408
|
-
# stream =
|
409
|
-
#
|
410
|
-
#
|
411
|
+
# stream = speech.stream encoding: :raw,
|
412
|
+
# sample_rate: 16000,
|
413
|
+
# utterance: true
|
411
414
|
#
|
412
415
|
# # register callback for when utterance has occurred.
|
413
416
|
# stream.on_utterance do
|
@@ -415,9 +418,9 @@ module Google
|
|
415
418
|
# stream.stop
|
416
419
|
# end
|
417
420
|
#
|
418
|
-
# # Stream 5 seconds of audio from the
|
421
|
+
# # Stream 5 seconds of audio from the microphone
|
419
422
|
# # Actual implementation of microphone input varies by platform
|
420
|
-
# 5.times
|
423
|
+
# 5.times do
|
421
424
|
# stream.send MicrophoneInput.read(32000)
|
422
425
|
# end
|
423
426
|
#
|
@@ -430,7 +433,7 @@ module Google
|
|
430
433
|
end
|
431
434
|
|
432
435
|
# @private returns single final result once :END_OF_UTTERANCE is
|
433
|
-
#
|
436
|
+
# received.
|
434
437
|
def utterance!
|
435
438
|
synchronize do
|
436
439
|
@callbacks[:utterance].each(&:call)
|
@@ -438,7 +441,7 @@ module Google
|
|
438
441
|
end
|
439
442
|
|
440
443
|
##
|
441
|
-
# Register to be notified of an error
|
444
|
+
# Register to be notified of an error received during the stream.
|
442
445
|
#
|
443
446
|
# @yield [callback] The block for accessing final results.
|
444
447
|
# @yieldparam [Exception] error The error raised.
|
@@ -448,7 +451,7 @@ module Google
|
|
448
451
|
#
|
449
452
|
# speech = Google::Cloud::Speech.new
|
450
453
|
#
|
451
|
-
# stream =
|
454
|
+
# stream = speech.stream encoding: :raw, sample_rate: 16000
|
452
455
|
#
|
453
456
|
# # register callback for when an error is returned
|
454
457
|
# stream.on_error do |error|
|
@@ -456,9 +459,9 @@ module Google
|
|
456
459
|
# stream.stop
|
457
460
|
# end
|
458
461
|
#
|
459
|
-
# # Stream 5 seconds of audio from the
|
462
|
+
# # Stream 5 seconds of audio from the microphone
|
460
463
|
# # Actual implementation of microphone input varies by platform
|
461
|
-
# 5.times
|
464
|
+
# 5.times do
|
462
465
|
# stream.send MicrophoneInput.read(32000)
|
463
466
|
# end
|
464
467
|
#
|
@@ -0,0 +1,377 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Google
|
16
|
+
module Cloud
|
17
|
+
module Speech
|
18
|
+
module V1beta1
|
19
|
+
# +SyncRecognizeRequest+ is the top-level message sent by the client for
|
20
|
+
# the +SyncRecognize+ method.
|
21
|
+
# @!attribute [rw] config
|
22
|
+
# @return [Google::Cloud::Speech::V1beta1::RecognitionConfig]
|
23
|
+
# [Required] The +config+ message provides information to the recognizer
|
24
|
+
# that specifies how to process the request.
|
25
|
+
# @!attribute [rw] audio
|
26
|
+
# @return [Google::Cloud::Speech::V1beta1::RecognitionAudio]
|
27
|
+
# [Required] The audio data to be recognized.
|
28
|
+
class SyncRecognizeRequest; end
|
29
|
+
|
30
|
+
# +AsyncRecognizeRequest+ is the top-level message sent by the client for
|
31
|
+
# the +AsyncRecognize+ method.
|
32
|
+
# @!attribute [rw] config
|
33
|
+
# @return [Google::Cloud::Speech::V1beta1::RecognitionConfig]
|
34
|
+
# [Required] The +config+ message provides information to the recognizer
|
35
|
+
# that specifies how to process the request.
|
36
|
+
# @!attribute [rw] audio
|
37
|
+
# @return [Google::Cloud::Speech::V1beta1::RecognitionAudio]
|
38
|
+
# [Required] The audio data to be recognized.
|
39
|
+
class AsyncRecognizeRequest; end
|
40
|
+
|
41
|
+
# +StreamingRecognizeRequest+ is the top-level message sent by the client for
|
42
|
+
# the +StreamingRecognize+. Multiple +StreamingRecognizeRequest+ messages are
|
43
|
+
# sent. The first message must contain a +streaming_config+ message and must
|
44
|
+
# not contain +audio+ data. All subsequent messages must contain +audio+ data
|
45
|
+
# and must not contain a +streaming_config+ message.
|
46
|
+
# @!attribute [rw] streaming_config
|
47
|
+
# @return [Google::Cloud::Speech::V1beta1::StreamingRecognitionConfig]
|
48
|
+
# The +streaming_config+ message provides information to the recognizer
|
49
|
+
# that specifies how to process the request.
|
50
|
+
#
|
51
|
+
# The first +StreamingRecognizeRequest+ message must contain a
|
52
|
+
# +streaming_config+ message.
|
53
|
+
# @!attribute [rw] audio_content
|
54
|
+
# @return [String]
|
55
|
+
# The audio data to be recognized. Sequential chunks of audio data are sent
|
56
|
+
# in sequential +StreamingRecognizeRequest+ messages. The first
|
57
|
+
# +StreamingRecognizeRequest+ message must not contain +audio_content+ data
|
58
|
+
# and all subsequent +StreamingRecognizeRequest+ messages must contain
|
59
|
+
# +audio_content+ data. The audio bytes must be encoded as specified in
|
60
|
+
# +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
|
61
|
+
# pure binary representation (not base64). See
|
62
|
+
# {audio limits}[https://cloud.google.com/speech/limits#content].
|
63
|
+
class StreamingRecognizeRequest; end
|
64
|
+
|
65
|
+
# The +StreamingRecognitionConfig+ message provides information to the
|
66
|
+
# recognizer that specifies how to process the request.
|
67
|
+
# @!attribute [rw] config
|
68
|
+
# @return [Google::Cloud::Speech::V1beta1::RecognitionConfig]
|
69
|
+
# [Required] The +config+ message provides information to the recognizer
|
70
|
+
# that specifies how to process the request.
|
71
|
+
# @!attribute [rw] single_utterance
|
72
|
+
# @return [true, false]
|
73
|
+
# [Optional] If +false+ or omitted, the recognizer will perform continuous
|
74
|
+
# recognition (continuing to process audio even if the user pauses speaking)
|
75
|
+
# until the client closes the output stream (gRPC API) or when the maximum
|
76
|
+
# time limit has been reached. Multiple +StreamingRecognitionResult+s with
|
77
|
+
# the +is_final+ flag set to +true+ may be returned.
|
78
|
+
#
|
79
|
+
# If +true+, the recognizer will detect a single spoken utterance. When it
|
80
|
+
# detects that the user has paused or stopped speaking, it will return an
|
81
|
+
# +END_OF_UTTERANCE+ event and cease recognition. It will return no more than
|
82
|
+
# one +StreamingRecognitionResult+ with the +is_final+ flag set to +true+.
|
83
|
+
# @!attribute [rw] interim_results
|
84
|
+
# @return [true, false]
|
85
|
+
# [Optional] If +true+, interim results (tentative hypotheses) may be
|
86
|
+
# returned as they become available (these interim results are indicated with
|
87
|
+
# the +is_final=false+ flag).
|
88
|
+
# If +false+ or omitted, only +is_final=true+ result(s) are returned.
|
89
|
+
class StreamingRecognitionConfig; end
|
90
|
+
|
91
|
+
# The +RecognitionConfig+ message provides information to the recognizer
|
92
|
+
# that specifies how to process the request.
|
93
|
+
# @!attribute [rw] encoding
|
94
|
+
# @return [Google::Cloud::Speech::V1beta1::RecognitionConfig::AudioEncoding]
|
95
|
+
# [Required] Encoding of audio data sent in all +RecognitionAudio+ messages.
|
96
|
+
# @!attribute [rw] sample_rate
|
97
|
+
# @return [Integer]
|
98
|
+
# [Required] Sample rate in Hertz of the audio data sent in all
|
99
|
+
# +RecognitionAudio+ messages. Valid values are: 8000-48000.
|
100
|
+
# 16000 is optimal. For best results, set the sampling rate of the audio
|
101
|
+
# source to 16000 Hz. If that's not possible, use the native sample rate of
|
102
|
+
# the audio source (instead of re-sampling).
|
103
|
+
# @!attribute [rw] language_code
|
104
|
+
# @return [String]
|
105
|
+
# [Optional] The language of the supplied audio as a BCP-47 language tag.
|
106
|
+
# Example: "en-GB" https://www.rfc-editor.org/rfc/bcp/bcp47.txt
|
107
|
+
# If omitted, defaults to "en-US". See
|
108
|
+
# {Language Support}[https://cloud.google.com/speech/docs/best-practices#language_support]
|
109
|
+
# for a list of the currently supported language codes.
|
110
|
+
# @!attribute [rw] max_alternatives
|
111
|
+
# @return [Integer]
|
112
|
+
# [Optional] Maximum number of recognition hypotheses to be returned.
|
113
|
+
# Specifically, the maximum number of +SpeechRecognitionAlternative+ messages
|
114
|
+
# within each +SpeechRecognitionResult+.
|
115
|
+
# The server may return fewer than +max_alternatives+.
|
116
|
+
# Valid values are +0+-+30+. A value of +0+ or +1+ will return a maximum of
|
117
|
+
# +1+. If omitted, defaults to +1+.
|
118
|
+
# @!attribute [rw] profanity_filter
|
119
|
+
# @return [true, false]
|
120
|
+
# [Optional] If set to +true+, the server will attempt to filter out
|
121
|
+
# profanities, replacing all but the initial character in each filtered word
|
122
|
+
# with asterisks, e.g. "f***". If set to +false+ or omitted, profanities
|
123
|
+
# won't be filtered out.
|
124
|
+
# @!attribute [rw] speech_context
|
125
|
+
# @return [Google::Cloud::Speech::V1beta1::SpeechContext]
|
126
|
+
# [Optional] A means to provide context to assist the speech recognition.
|
127
|
+
class RecognitionConfig
|
128
|
+
# Audio encoding of the data sent in the audio message. All encodings support
|
129
|
+
# only 1 channel (mono) audio. Only +FLAC+ includes a header that describes
|
130
|
+
# the bytes of audio that follow the header. The other encodings are raw
|
131
|
+
# audio bytes with no header.
|
132
|
+
#
|
133
|
+
# For best results, the audio source should be captured and transmitted using
|
134
|
+
# a lossless encoding (+FLAC+ or +LINEAR16+). Recognition accuracy may be
|
135
|
+
# reduced if lossy codecs (such as AMR, AMR_WB and MULAW) are used to capture
|
136
|
+
# or transmit the audio, particularly if background noise is present.
|
137
|
+
module AudioEncoding
|
138
|
+
# Not specified. Will return result Google::Rpc::Code::INVALID_ARGUMENT.
|
139
|
+
ENCODING_UNSPECIFIED = 0
|
140
|
+
|
141
|
+
# Uncompressed 16-bit signed little-endian samples (Linear PCM).
|
142
|
+
# This is the only encoding that may be used by +AsyncRecognize+.
|
143
|
+
LINEAR16 = 1
|
144
|
+
|
145
|
+
# This is the recommended encoding for +SyncRecognize+ and
|
146
|
+
# +StreamingRecognize+ because it uses lossless compression; therefore
|
147
|
+
# recognition accuracy is not compromised by a lossy codec.
|
148
|
+
#
|
149
|
+
# The stream FLAC (Free Lossless Audio Codec) encoding is specified at:
|
150
|
+
# http://flac.sourceforge.net/documentation.html.
|
151
|
+
# 16-bit and 24-bit samples are supported.
|
152
|
+
# Not all fields in STREAMINFO are supported.
|
153
|
+
FLAC = 2
|
154
|
+
|
155
|
+
# 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
|
156
|
+
MULAW = 3
|
157
|
+
|
158
|
+
# Adaptive Multi-Rate Narrowband codec. +sample_rate+ must be 8000 Hz.
|
159
|
+
AMR = 4
|
160
|
+
|
161
|
+
# Adaptive Multi-Rate Wideband codec. +sample_rate+ must be 16000 Hz.
|
162
|
+
AMR_WB = 5
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Provides "hints" to the speech recognizer to favor specific words and phrases
|
167
|
+
# in the results.
|
168
|
+
# @!attribute [rw] phrases
|
169
|
+
# @return [Array<String>]
|
170
|
+
# [Optional] A list of strings containing words and phrases "hints" so that
|
171
|
+
# the speech recognition is more likely to recognize them. This can be used
|
172
|
+
# to improve the accuracy for specific words and phrases, for example, if
|
173
|
+
# specific commands are typically spoken by the user. This can also be used
|
174
|
+
# to add additional words to the vocabulary of the recognizer. See
|
175
|
+
# {usage limits}[https://cloud.google.com/speech/limits#content].
|
176
|
+
class SpeechContext; end
|
177
|
+
|
178
|
+
# Contains audio data in the encoding specified in the +RecognitionConfig+.
|
179
|
+
# Either +content+ or +uri+ must be supplied. Supplying both or neither
|
180
|
+
# returns Google::Rpc::Code::INVALID_ARGUMENT. See
|
181
|
+
# {audio limits}[https://cloud.google.com/speech/limits#content].
|
182
|
+
# @!attribute [rw] content
|
183
|
+
# @return [String]
|
184
|
+
# The audio data bytes encoded as specified in
|
185
|
+
# +RecognitionConfig+. Note: as with all bytes fields, protobuffers use a
|
186
|
+
# pure binary representation, whereas JSON representations use base64.
|
187
|
+
# @!attribute [rw] uri
|
188
|
+
# @return [String]
|
189
|
+
# URI that points to a file that contains audio data bytes as specified in
|
190
|
+
# +RecognitionConfig+. Currently, only Google Cloud Storage URIs are
|
191
|
+
# supported, which must be specified in the following format:
|
192
|
+
# +gs://bucket_name/object_name+ (other URI formats return
|
193
|
+
# Google::Rpc::Code::INVALID_ARGUMENT). For more information, see
|
194
|
+
# {Request URIs}[https://cloud.google.com/storage/docs/reference-uris].
|
195
|
+
class RecognitionAudio; end
|
196
|
+
|
197
|
+
# +SyncRecognizeResponse+ is the only message returned to the client by
|
198
|
+
# +SyncRecognize+. It contains the result as zero or more sequential
|
199
|
+
# +SpeechRecognitionResult+ messages.
|
200
|
+
# @!attribute [rw] results
|
201
|
+
# @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionResult>]
|
202
|
+
# [Output-only] Sequential list of transcription results corresponding to
|
203
|
+
# sequential portions of audio.
|
204
|
+
class SyncRecognizeResponse; end
|
205
|
+
|
206
|
+
# +AsyncRecognizeResponse+ is the only message returned to the client by
|
207
|
+
# +AsyncRecognize+. It contains the result as zero or more sequential
|
208
|
+
# +SpeechRecognitionResult+ messages. It is included in the +result.response+
|
209
|
+
# field of the +Operation+ returned by the +GetOperation+ call of the
|
210
|
+
# +google::longrunning::Operations+ service.
|
211
|
+
# @!attribute [rw] results
|
212
|
+
# @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionResult>]
|
213
|
+
# [Output-only] Sequential list of transcription results corresponding to
|
214
|
+
# sequential portions of audio.
|
215
|
+
class AsyncRecognizeResponse; end
|
216
|
+
|
217
|
+
# +AsyncRecognizeMetadata+ describes the progress of a long-running
|
218
|
+
# +AsyncRecognize+ call. It is included in the +metadata+ field of the
|
219
|
+
# +Operation+ returned by the +GetOperation+ call of the
|
220
|
+
# +google::longrunning::Operations+ service.
|
221
|
+
# @!attribute [rw] progress_percent
|
222
|
+
# @return [Integer]
|
223
|
+
# Approximate percentage of audio processed thus far. Guaranteed to be 100
|
224
|
+
# when the audio is fully processed and the results are available.
|
225
|
+
# @!attribute [rw] start_time
|
226
|
+
# @return [Google::Protobuf::Timestamp]
|
227
|
+
# Time when the request was received.
|
228
|
+
# @!attribute [rw] last_update_time
|
229
|
+
# @return [Google::Protobuf::Timestamp]
|
230
|
+
# Time of the most recent processing update.
|
231
|
+
class AsyncRecognizeMetadata; end
|
232
|
+
|
233
|
+
# +StreamingRecognizeResponse+ is the only message returned to the client by
|
234
|
+
# +StreamingRecognize+. A series of one or more +StreamingRecognizeResponse+
|
235
|
+
# messages are streamed back to the client.
|
236
|
+
#
|
237
|
+
# Here's an example of a series of ten +StreamingRecognizeResponse+s that might
|
238
|
+
# be returned while processing audio:
|
239
|
+
#
|
240
|
+
# 1. endpointer_type: START_OF_SPEECH
|
241
|
+
#
|
242
|
+
# 2. results { alternatives { transcript: "tube" } stability: 0.01 }
|
243
|
+
# result_index: 0
|
244
|
+
#
|
245
|
+
# 3. results { alternatives { transcript: "to be a" } stability: 0.01 }
|
246
|
+
# result_index: 0
|
247
|
+
#
|
248
|
+
# 4. results { alternatives { transcript: "to be" } stability: 0.9 }
|
249
|
+
# results { alternatives { transcript: " or not to be" } stability: 0.01 }
|
250
|
+
# result_index: 0
|
251
|
+
#
|
252
|
+
# 5. results { alternatives { transcript: "to be or not to be"
|
253
|
+
# confidence: 0.92 }
|
254
|
+
# alternatives { transcript: "to bee or not to bee" }
|
255
|
+
# is_final: true }
|
256
|
+
# result_index: 0
|
257
|
+
#
|
258
|
+
# 6. results { alternatives { transcript: " that's" } stability: 0.01 }
|
259
|
+
# result_index: 1
|
260
|
+
#
|
261
|
+
# 7. results { alternatives { transcript: " that is" } stability: 0.9 }
|
262
|
+
# results { alternatives { transcript: " the question" } stability: 0.01 }
|
263
|
+
# result_index: 1
|
264
|
+
#
|
265
|
+
# 8. endpointer_type: END_OF_SPEECH
|
266
|
+
#
|
267
|
+
# 9. results { alternatives { transcript: " that is the question"
|
268
|
+
# confidence: 0.98 }
|
269
|
+
# alternatives { transcript: " that was the question" }
|
270
|
+
# is_final: true }
|
271
|
+
# result_index: 1
|
272
|
+
#
|
273
|
+
# 10. endpointer_type: END_OF_AUDIO
|
274
|
+
#
|
275
|
+
# Notes:
|
276
|
+
#
|
277
|
+
# - Only two of the above responses #5 and #9 contain final results, they are
|
278
|
+
# indicated by +is_final: true+. Concatenating these together generates the
|
279
|
+
# full transcript: "to be or not to be that is the question".
|
280
|
+
#
|
281
|
+
# - The others contain interim +results+. #4 and #7 contain two interim
|
282
|
+
# +results+, the first portion has a high stability and is less likely to
|
283
|
+
# change, the second portion has a low stability and is very likely to
|
284
|
+
# change. A UI designer might choose to show only high stability +results+.
|
285
|
+
#
|
286
|
+
# - The +result_index+ indicates the portion of audio that has had final
|
287
|
+
# results returned, and is no longer being processed. For example, the
|
288
|
+
# +results+ in #6 and later correspond to the portion of audio after
|
289
|
+
# "to be or not to be".
|
290
|
+
# @!attribute [rw] error
|
291
|
+
# @return [Google::Rpc::Status]
|
292
|
+
# [Output-only] If set, returns a Google::Rpc::Status message that
|
293
|
+
# specifies the error for the operation.
|
294
|
+
# @!attribute [rw] results
|
295
|
+
# @return [Array<Google::Cloud::Speech::V1beta1::StreamingRecognitionResult>]
|
296
|
+
# [Output-only] This repeated list contains zero or more results that
|
297
|
+
# correspond to consecutive portions of the audio currently being processed.
|
298
|
+
# It contains zero or one +is_final=true+ result (the newly settled portion),
|
299
|
+
# followed by zero or more +is_final=false+ results.
|
300
|
+
# @!attribute [rw] result_index
|
301
|
+
# @return [Integer]
|
302
|
+
# [Output-only] Indicates the lowest index in the +results+ array that has
|
303
|
+
# changed. The repeated +StreamingRecognitionResult+ results overwrite past
|
304
|
+
# results at this index and higher.
|
305
|
+
# @!attribute [rw] endpointer_type
|
306
|
+
# @return [Google::Cloud::Speech::V1beta1::StreamingRecognizeResponse::EndpointerType]
|
307
|
+
# [Output-only] Indicates the type of endpointer event.
|
308
|
+
class StreamingRecognizeResponse
|
309
|
+
# Indicates the type of endpointer event.
|
310
|
+
module EndpointerType
|
311
|
+
# No endpointer event specified.
|
312
|
+
ENDPOINTER_EVENT_UNSPECIFIED = 0
|
313
|
+
|
314
|
+
# Speech has been detected in the audio stream.
|
315
|
+
START_OF_SPEECH = 1
|
316
|
+
|
317
|
+
# Speech has ceased to be detected in the audio stream.
|
318
|
+
END_OF_SPEECH = 2
|
319
|
+
|
320
|
+
# The end of the audio stream has been reached. and it is being processed.
|
321
|
+
END_OF_AUDIO = 3
|
322
|
+
|
323
|
+
# This event is only sent when +single_utterance+ is +true+. It indicates
|
324
|
+
# that the server has detected the end of the user's speech utterance and
|
325
|
+
# expects no additional speech. Therefore, the server will not process
|
326
|
+
# additional audio. The client should stop sending additional audio data.
|
327
|
+
END_OF_UTTERANCE = 4
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
# A streaming speech recognition result corresponding to a portion of the audio
|
332
|
+
# that is currently being processed.
|
333
|
+
# @!attribute [rw] alternatives
|
334
|
+
# @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionAlternative>]
|
335
|
+
# [Output-only] May contain one or more recognition hypotheses (up to the
|
336
|
+
# maximum specified in +max_alternatives+).
|
337
|
+
# @!attribute [rw] is_final
|
338
|
+
# @return [true, false]
|
339
|
+
# [Output-only] If +false+, this +StreamingRecognitionResult+ represents an
|
340
|
+
# interim result that may change. If +true+, this is the final time the
|
341
|
+
# speech service will return this particular +StreamingRecognitionResult+,
|
342
|
+
# the recognizer will not return any further hypotheses for this portion of
|
343
|
+
# the transcript and corresponding audio.
|
344
|
+
# @!attribute [rw] stability
|
345
|
+
# @return [Float]
|
346
|
+
# [Output-only] An estimate of the probability that the recognizer will not
|
347
|
+
# change its guess about this interim result. Values range from 0.0
|
348
|
+
# (completely unstable) to 1.0 (completely stable). Note that this is not the
|
349
|
+
# same as +confidence+, which estimates the probability that a recognition
|
350
|
+
# result is correct.
|
351
|
+
# This field is only provided for interim results (+is_final=false+).
|
352
|
+
# The default of 0.0 is a sentinel value indicating stability was not set.
|
353
|
+
class StreamingRecognitionResult; end
|
354
|
+
|
355
|
+
# A speech recognition result corresponding to a portion of the audio.
|
356
|
+
# @!attribute [rw] alternatives
|
357
|
+
# @return [Array<Google::Cloud::Speech::V1beta1::SpeechRecognitionAlternative>]
|
358
|
+
# [Output-only] May contain one or more recognition hypotheses (up to the
|
359
|
+
# maximum specified in +max_alternatives+).
|
360
|
+
class SpeechRecognitionResult; end
|
361
|
+
|
362
|
+
# Alternative hypotheses (a.k.a. n-best list).
|
363
|
+
# @!attribute [rw] transcript
|
364
|
+
# @return [String]
|
365
|
+
# [Output-only] Transcript text representing the words that the user spoke.
|
366
|
+
# @!attribute [rw] confidence
|
367
|
+
# @return [Float]
|
368
|
+
# [Output-only] The confidence estimate between 0.0 and 1.0. A higher number
|
369
|
+
# means the system is more confident that the recognition is correct.
|
370
|
+
# This field is typically provided only for the top hypothesis, and only for
|
371
|
+
# +is_final=true+ results.
|
372
|
+
# The default of 0.0 is a sentinel value indicating confidence was not set.
|
373
|
+
class SpeechRecognitionAlternative; end
|
374
|
+
end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
end
|
@@ -158,10 +158,10 @@ module Google
|
|
158
158
|
config,
|
159
159
|
audio,
|
160
160
|
options: nil
|
161
|
-
req = Google::Cloud::Speech::V1beta1::SyncRecognizeRequest.new(
|
161
|
+
req = Google::Cloud::Speech::V1beta1::SyncRecognizeRequest.new({
|
162
162
|
config: config,
|
163
163
|
audio: audio
|
164
|
-
)
|
164
|
+
}.delete_if { |_, v| v.nil? })
|
165
165
|
@sync_recognize.call(req, options)
|
166
166
|
end
|
167
167
|
|
@@ -196,10 +196,10 @@ module Google
|
|
196
196
|
config,
|
197
197
|
audio,
|
198
198
|
options: nil
|
199
|
-
req = Google::Cloud::Speech::V1beta1::AsyncRecognizeRequest.new(
|
199
|
+
req = Google::Cloud::Speech::V1beta1::AsyncRecognizeRequest.new({
|
200
200
|
config: config,
|
201
201
|
audio: audio
|
202
|
-
)
|
202
|
+
}.delete_if { |_, v| v.nil? })
|
203
203
|
@async_recognize.call(req, options)
|
204
204
|
end
|
205
205
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-speech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.21.
|
4
|
+
version: 0.21.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Moore
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-11-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: google-cloud-core
|
@@ -199,14 +199,14 @@ dependencies:
|
|
199
199
|
requirements:
|
200
200
|
- - "~>"
|
201
201
|
- !ruby/object:Gem::Version
|
202
|
-
version: 0.1.
|
202
|
+
version: 0.1.8
|
203
203
|
type: :development
|
204
204
|
prerelease: false
|
205
205
|
version_requirements: !ruby/object:Gem::Requirement
|
206
206
|
requirements:
|
207
207
|
- - "~>"
|
208
208
|
- !ruby/object:Gem::Version
|
209
|
-
version: 0.1.
|
209
|
+
version: 0.1.8
|
210
210
|
description: google-cloud-speech is the official library for Google Cloud Speech API.
|
211
211
|
email:
|
212
212
|
- mike@blowmage.com
|
@@ -215,6 +215,9 @@ executables: []
|
|
215
215
|
extensions: []
|
216
216
|
extra_rdoc_files: []
|
217
217
|
files:
|
218
|
+
- ".yardopts"
|
219
|
+
- LICENSE
|
220
|
+
- README.md
|
218
221
|
- lib/google-cloud-speech.rb
|
219
222
|
- lib/google/cloud/speech.rb
|
220
223
|
- lib/google/cloud/speech/audio.rb
|
@@ -227,6 +230,7 @@ files:
|
|
227
230
|
- lib/google/cloud/speech/v1beta1.rb
|
228
231
|
- lib/google/cloud/speech/v1beta1/cloud_speech_pb.rb
|
229
232
|
- lib/google/cloud/speech/v1beta1/cloud_speech_services_pb.rb
|
233
|
+
- lib/google/cloud/speech/v1beta1/doc/google/cloud/speech/v1beta1/cloud_speech.rb
|
230
234
|
- lib/google/cloud/speech/v1beta1/speech_api.rb
|
231
235
|
- lib/google/cloud/speech/v1beta1/speech_client_config.json
|
232
236
|
- lib/google/cloud/speech/version.rb
|
@@ -250,7 +254,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
250
254
|
version: '0'
|
251
255
|
requirements: []
|
252
256
|
rubyforge_project:
|
253
|
-
rubygems_version: 2.
|
257
|
+
rubygems_version: 2.4.5.1
|
254
258
|
signing_key:
|
255
259
|
specification_version: 4
|
256
260
|
summary: API Client library for Google Cloud Speech API
|