google-cloud-speech 0.34.1 → 0.35.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 363ea2b2a855f3a5721f14eaba4ebaea0c474912996d8777327a5593d58fb25a
4
- data.tar.gz: 70445356ae04fe3a805efc76269d17269275d0ae4bc0e19ecc5915a63a2a5abe
3
+ metadata.gz: 5a486e59964640a724d7a70449004707ce610868a9d6776783758b8cefe1d01b
4
+ data.tar.gz: 0db54dd44e394919bc4f20d41f0570d52c8f543a136762e34700ef7daed98730
5
5
  SHA512:
6
- metadata.gz: e9203cea2fc78d9d8cf7dd3d98ecad2e34c9e8e55755920171312221f63c27aca05dc7958260420804c51093ae928d8a070af33286fb117defdc6d0d0094c847
7
- data.tar.gz: 2fc1a2c2bd6dfa40ab570d3c9447f02414134640a5ed6364d54285254f070e8d65e0af10fc4d8775583361c0e1e4d296a08e6e1d8f2f41dbcefb04d7d312bb69
6
+ metadata.gz: 56f72325d2e5420551029a13e2a1d7fd20a5eba658e3fa84998c5fc413cea15b87ea718839c309d7b6d545ba31b8f5ebdb548f0e475fe53a3bc798c0a716adf6
7
+ data.tar.gz: cab1730447f4ffc096e06c35d8d71b4b018a20ffc78414db896af76fb4290b41122174874d0a1b508f00609db8d3275c473c0de36c3d43c32bff598fddc1b269
@@ -6,9 +6,7 @@ require 'google/protobuf'
6
6
 
7
7
  require 'google/api/annotations_pb'
8
8
  require 'google/longrunning/operations_pb'
9
- require 'google/protobuf/any_pb'
10
9
  require 'google/protobuf/duration_pb'
11
- require 'google/protobuf/empty_pb'
12
10
  require 'google/protobuf/timestamp_pb'
13
11
  require 'google/rpc/status_pb'
14
12
  Google::Protobuf::DescriptorPool.generated_pool.build do
@@ -42,6 +40,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
42
40
  repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
43
41
  optional :enable_word_time_offsets, :bool, 8
44
42
  optional :enable_automatic_punctuation, :bool, 11
43
+ optional :metadata, :message, 9, "google.cloud.speech.v1.RecognitionMetadata"
45
44
  optional :model, :string, 13
46
45
  optional :use_enhanced, :bool, 14
47
46
  end
@@ -55,6 +54,47 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
55
54
  value :OGG_OPUS, 6
56
55
  value :SPEEX_WITH_HEADER_BYTE, 7
57
56
  end
57
+ add_message "google.cloud.speech.v1.RecognitionMetadata" do
58
+ optional :interaction_type, :enum, 1, "google.cloud.speech.v1.RecognitionMetadata.InteractionType"
59
+ optional :industry_naics_code_of_audio, :uint32, 3
60
+ optional :microphone_distance, :enum, 4, "google.cloud.speech.v1.RecognitionMetadata.MicrophoneDistance"
61
+ optional :original_media_type, :enum, 5, "google.cloud.speech.v1.RecognitionMetadata.OriginalMediaType"
62
+ optional :recording_device_type, :enum, 6, "google.cloud.speech.v1.RecognitionMetadata.RecordingDeviceType"
63
+ optional :recording_device_name, :string, 7
64
+ optional :original_mime_type, :string, 8
65
+ optional :audio_topic, :string, 10
66
+ end
67
+ add_enum "google.cloud.speech.v1.RecognitionMetadata.InteractionType" do
68
+ value :INTERACTION_TYPE_UNSPECIFIED, 0
69
+ value :DISCUSSION, 1
70
+ value :PRESENTATION, 2
71
+ value :PHONE_CALL, 3
72
+ value :VOICEMAIL, 4
73
+ value :PROFESSIONALLY_PRODUCED, 5
74
+ value :VOICE_SEARCH, 6
75
+ value :VOICE_COMMAND, 7
76
+ value :DICTATION, 8
77
+ end
78
+ add_enum "google.cloud.speech.v1.RecognitionMetadata.MicrophoneDistance" do
79
+ value :MICROPHONE_DISTANCE_UNSPECIFIED, 0
80
+ value :NEARFIELD, 1
81
+ value :MIDFIELD, 2
82
+ value :FARFIELD, 3
83
+ end
84
+ add_enum "google.cloud.speech.v1.RecognitionMetadata.OriginalMediaType" do
85
+ value :ORIGINAL_MEDIA_TYPE_UNSPECIFIED, 0
86
+ value :AUDIO, 1
87
+ value :VIDEO, 2
88
+ end
89
+ add_enum "google.cloud.speech.v1.RecognitionMetadata.RecordingDeviceType" do
90
+ value :RECORDING_DEVICE_TYPE_UNSPECIFIED, 0
91
+ value :SMARTPHONE, 1
92
+ value :PC, 2
93
+ value :PHONE_LINE, 3
94
+ value :VEHICLE, 4
95
+ value :OTHER_OUTDOOR_DEVICE, 5
96
+ value :OTHER_INDOOR_DEVICE, 6
97
+ end
58
98
  add_message "google.cloud.speech.v1.SpeechContext" do
59
99
  repeated :phrases, :string, 1
60
100
  end
@@ -88,7 +128,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
88
128
  repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
89
129
  optional :is_final, :bool, 2
90
130
  optional :stability, :float, 3
131
+ optional :result_end_time, :message, 4, "google.protobuf.Duration"
91
132
  optional :channel_tag, :int32, 5
133
+ optional :language_code, :string, 6
92
134
  end
93
135
  add_message "google.cloud.speech.v1.SpeechRecognitionResult" do
94
136
  repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
@@ -116,6 +158,11 @@ module Google
116
158
  StreamingRecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig").msgclass
117
159
  RecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig").msgclass
118
160
  RecognitionConfig::AudioEncoding = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig.AudioEncoding").enummodule
161
+ RecognitionMetadata = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata").msgclass
162
+ RecognitionMetadata::InteractionType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata.InteractionType").enummodule
163
+ RecognitionMetadata::MicrophoneDistance = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata.MicrophoneDistance").enummodule
164
+ RecognitionMetadata::OriginalMediaType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata.OriginalMediaType").enummodule
165
+ RecognitionMetadata::RecordingDeviceType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata.RecordingDeviceType").enummodule
119
166
  SpeechContext = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechContext").msgclass
120
167
  RecognitionAudio = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionAudio").msgclass
121
168
  RecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognizeResponse").msgclass
@@ -165,6 +165,9 @@ module Google
165
165
  # Note: This is currently offered as an experimental service, complimentary
166
166
  # to all users. In the future this may be exclusively available as a
167
167
  # premium feature.
168
+ # @!attribute [rw] metadata
169
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata]
170
+ # *Optional* Metadata regarding this request.
168
171
  # @!attribute [rw] model
169
172
  # @return [String]
170
173
  # *Optional* Which model to select for the given request. Select the model
@@ -284,6 +287,133 @@ module Google
284
287
  end
285
288
  end
286
289
 
290
+ # Description of audio data to be recognized.
291
+ # @!attribute [rw] interaction_type
292
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata::InteractionType]
293
+ # The use case most closely describing the audio content to be recognized.
294
+ # @!attribute [rw] industry_naics_code_of_audio
295
+ # @return [Integer]
296
+ # The industry vertical to which this speech recognition request most
297
+ # closely applies. This is most indicative of the topics contained
298
+ # in the audio. Use the 6-digit NAICS code to identify the industry
299
+ # vertical - see https://www.naics.com/search/.
300
+ # @!attribute [rw] microphone_distance
301
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata::MicrophoneDistance]
302
+ # The audio type that most closely describes the audio being recognized.
303
+ # @!attribute [rw] original_media_type
304
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata::OriginalMediaType]
305
+ # The original media the speech was recorded on.
306
+ # @!attribute [rw] recording_device_type
307
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata::RecordingDeviceType]
308
+ # The type of device the speech was recorded with.
309
+ # @!attribute [rw] recording_device_name
310
+ # @return [String]
311
+ # The device used to make the recording. Examples 'Nexus 5X' or
312
+ # 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or
313
+ # 'Cardioid Microphone'.
314
+ # @!attribute [rw] original_mime_type
315
+ # @return [String]
316
+ # Mime type of the original audio file. For example `audio/m4a`,
317
+ # `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
318
+ # A list of possible audio mime types is maintained at
319
+ # http://www.iana.org/assignments/media-types/media-types.xhtml#audio
320
+ # @!attribute [rw] audio_topic
321
+ # @return [String]
322
+ # Description of the content. Eg. "Recordings of federal supreme court
323
+ # hearings from 2012".
324
+ class RecognitionMetadata
325
+ # Use case categories that the audio recognition request can be described
326
+ # by.
327
+ module InteractionType
328
+ # Use case is either unknown or is something other than one of the other
329
+ # values below.
330
+ INTERACTION_TYPE_UNSPECIFIED = 0
331
+
332
+ # Multiple people in a conversation or discussion. For example in a
333
+ # meeting with two or more people actively participating. Typically
334
+ # all the primary people speaking would be in the same room (if not,
335
+ # see PHONE_CALL)
336
+ DISCUSSION = 1
337
+
338
+ # One or more persons lecturing or presenting to others, mostly
339
+ # uninterrupted.
340
+ PRESENTATION = 2
341
+
342
+ # A phone-call or video-conference in which two or more people, who are
343
+ # not in the same room, are actively participating.
344
+ PHONE_CALL = 3
345
+
346
+ # A recorded message intended for another person to listen to.
347
+ VOICEMAIL = 4
348
+
349
+ # Professionally produced audio (eg. TV Show, Podcast).
350
+ PROFESSIONALLY_PRODUCED = 5
351
+
352
+ # Transcribe spoken questions and queries into text.
353
+ VOICE_SEARCH = 6
354
+
355
+ # Transcribe voice commands, such as for controlling a device.
356
+ VOICE_COMMAND = 7
357
+
358
+ # Transcribe speech to text to create a written document, such as a
359
+ # text-message, email or report.
360
+ DICTATION = 8
361
+ end
362
+
363
+ # Enumerates the types of capture settings describing an audio file.
364
+ module MicrophoneDistance
365
+ # Audio type is not known.
366
+ MICROPHONE_DISTANCE_UNSPECIFIED = 0
367
+
368
+ # The audio was captured from a closely placed microphone. Eg. phone,
369
+ # dictaphone, or handheld microphone. Generally if there speaker is within
370
+ # 1 meter of the microphone.
371
+ NEARFIELD = 1
372
+
373
+ # The speaker if within 3 meters of the microphone.
374
+ MIDFIELD = 2
375
+
376
+ # The speaker is more than 3 meters away from the microphone.
377
+ FARFIELD = 3
378
+ end
379
+
380
+ # The original media the speech was recorded on.
381
+ module OriginalMediaType
382
+ # Unknown original media type.
383
+ ORIGINAL_MEDIA_TYPE_UNSPECIFIED = 0
384
+
385
+ # The speech data is an audio recording.
386
+ AUDIO = 1
387
+
388
+ # The speech data originally recorded on a video.
389
+ VIDEO = 2
390
+ end
391
+
392
+ # The type of device the speech was recorded with.
393
+ module RecordingDeviceType
394
+ # The recording device is unknown.
395
+ RECORDING_DEVICE_TYPE_UNSPECIFIED = 0
396
+
397
+ # Speech was recorded on a smartphone.
398
+ SMARTPHONE = 1
399
+
400
+ # Speech was recorded using a personal computer or tablet.
401
+ PC = 2
402
+
403
+ # Speech was recorded over a phone line.
404
+ PHONE_LINE = 3
405
+
406
+ # Speech was recorded in a vehicle.
407
+ VEHICLE = 4
408
+
409
+ # Speech was recorded outdoors.
410
+ OTHER_OUTDOOR_DEVICE = 5
411
+
412
+ # Speech was recorded indoors.
413
+ OTHER_INDOOR_DEVICE = 6
414
+ end
415
+ end
416
+
287
417
  # Provides "hints" to the speech recognizer to favor specific words and phrases
288
418
  # in the results.
289
419
  # @!attribute [rw] phrases
@@ -453,11 +583,21 @@ module Google
453
583
  # (completely unstable) to 1.0 (completely stable).
454
584
  # This field is only provided for interim results (`is_final=false`).
455
585
  # The default of 0.0 is a sentinel value indicating `stability` was not set.
586
+ # @!attribute [rw] result_end_time
587
+ # @return [Google::Protobuf::Duration]
588
+ # Output only. Time offset of the end of this result relative to the
589
+ # beginning of the audio.
456
590
  # @!attribute [rw] channel_tag
457
591
  # @return [Integer]
458
592
  # For multi-channel audio, this is the channel number corresponding to the
459
593
  # recognized result for the audio from that channel.
460
594
  # For audio_channel_count = N, its output values can range from '1' to 'N'.
595
+ # @!attribute [rw] language_code
596
+ # @return [String]
597
+ # Output only. The
598
+ # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
599
+ # language in this result. This language code was detected to have the most
600
+ # likelihood of being spoken in the audio.
461
601
  class StreamingRecognitionResult; end
462
602
 
463
603
  # A speech recognition result corresponding to a portion of the audio.
@@ -29,6 +29,7 @@ require "google/longrunning/operations_client"
29
29
 
30
30
  require "google/cloud/speech/v1/cloud_speech_pb"
31
31
  require "google/cloud/speech/v1/credentials"
32
+ require "google/cloud/speech/version"
32
33
 
33
34
  module Google
34
35
  module Cloud
@@ -136,7 +137,7 @@ module Google
136
137
  updater_proc = credentials.updater_proc
137
138
  end
138
139
 
139
- package_version = Gem.loaded_specs['google-cloud-speech'].version.version
140
+ package_version = Google::Cloud::Speech::VERSION
140
141
 
141
142
  google_api_client = "gl-ruby/#{RUBY_VERSION}"
142
143
  google_api_client << " #{lib_name}/#{lib_version}" if lib_name
@@ -59,6 +59,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
59
59
  value :AMR_WB, 5
60
60
  value :OGG_OPUS, 6
61
61
  value :SPEEX_WITH_HEADER_BYTE, 7
62
+ value :MP3, 8
62
63
  end
63
64
  add_message "google.cloud.speech.v1p1beta1.RecognitionMetadata" do
64
65
  optional :interaction_type, :enum, 1, "google.cloud.speech.v1p1beta1.RecognitionMetadata.InteractionType"
@@ -104,6 +105,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
104
105
  end
105
106
  add_message "google.cloud.speech.v1p1beta1.SpeechContext" do
106
107
  repeated :phrases, :string, 1
108
+ optional :boost, :float, 4
107
109
  end
108
110
  add_message "google.cloud.speech.v1p1beta1.RecognitionAudio" do
109
111
  oneof :audio_source do
@@ -321,6 +321,11 @@ module Google
321
321
  # is replaced with a single byte containing the block length. Only Speex
322
322
  # wideband is supported. `sample_rate_hertz` must be 16000.
323
323
  SPEEX_WITH_HEADER_BYTE = 7
324
+
325
+ # MP3 audio. Support all standard MP3 bitrates (which range from 32-320
326
+ # kbps). When using this encoding, `sample_rate_hertz` can be optionally
327
+ # unset if not known.
328
+ MP3 = 8
324
329
  end
325
330
  end
326
331
 
@@ -465,6 +470,22 @@ module Google
465
470
  # specific commands are typically spoken by the user. This can also be used
466
471
  # to add additional words to the vocabulary of the recognizer. See
467
472
  # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
473
+ #
474
+ # List items can also be set to classes for groups of words that represent
475
+ # common concepts that occur in natural language. For example, rather than
476
+ # providing phrase hints for every month of the year, using the $MONTH class
477
+ # improves the likelihood of correctly transcribing audio that includes
478
+ # months.
479
+ # @!attribute [rw] boost
480
+ # @return [Float]
481
+ # Hint Boost. Positive value will increase the probability that a specific
482
+ # phrase will be recognized over other similar sounding phrases. The higher
483
+ # the boost, the higher the chance of false positive recognition as well.
484
+ # Negative boost values would correspond to anti-biasing. Anti-biasing is not
485
+ # enabled, so negative boost will simply be ignored. Though `boost` can
486
+ # accept a wide range of positive values, most use cases are best served with
487
+ # values between 0 and 20. We recommend using a binary search approach to
488
+ # finding the optimal value for your use case.
468
489
  class SpeechContext; end
469
490
 
470
491
  # Contains audio data in the encoding specified in the `RecognitionConfig`.
@@ -29,6 +29,7 @@ require "google/longrunning/operations_client"
29
29
 
30
30
  require "google/cloud/speech/v1p1beta1/cloud_speech_pb"
31
31
  require "google/cloud/speech/v1p1beta1/credentials"
32
+ require "google/cloud/speech/version"
32
33
 
33
34
  module Google
34
35
  module Cloud
@@ -136,7 +137,7 @@ module Google
136
137
  updater_proc = credentials.updater_proc
137
138
  end
138
139
 
139
- package_version = Gem.loaded_specs['google-cloud-speech'].version.version
140
+ package_version = Google::Cloud::Speech::VERSION
140
141
 
141
142
  google_api_client = "gl-ruby/#{RUBY_VERSION}"
142
143
  google_api_client << " #{lib_name}/#{lib_version}" if lib_name
@@ -0,0 +1,22 @@
1
+ # Copyright 2019 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Speech
19
+ VERSION = "0.35.0".freeze
20
+ end
21
+ end
22
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.34.1
4
+ version: 0.35.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Google LLC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-04-30 00:00:00.000000000 Z
11
+ date: 2019-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: google-gax
@@ -131,6 +131,7 @@ files:
131
131
  - lib/google/cloud/speech/v1p1beta1/speech_client.rb
132
132
  - lib/google/cloud/speech/v1p1beta1/speech_client_config.json
133
133
  - lib/google/cloud/speech/v1p1beta1/stream.rb
134
+ - lib/google/cloud/speech/version.rb
134
135
  homepage: https://github.com/googleapis/googleapis
135
136
  licenses:
136
137
  - Apache-2.0