google-cloud-speech 0.34.1 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 363ea2b2a855f3a5721f14eaba4ebaea0c474912996d8777327a5593d58fb25a
4
- data.tar.gz: 70445356ae04fe3a805efc76269d17269275d0ae4bc0e19ecc5915a63a2a5abe
3
+ metadata.gz: 5a486e59964640a724d7a70449004707ce610868a9d6776783758b8cefe1d01b
4
+ data.tar.gz: 0db54dd44e394919bc4f20d41f0570d52c8f543a136762e34700ef7daed98730
5
5
  SHA512:
6
- metadata.gz: e9203cea2fc78d9d8cf7dd3d98ecad2e34c9e8e55755920171312221f63c27aca05dc7958260420804c51093ae928d8a070af33286fb117defdc6d0d0094c847
7
- data.tar.gz: 2fc1a2c2bd6dfa40ab570d3c9447f02414134640a5ed6364d54285254f070e8d65e0af10fc4d8775583361c0e1e4d296a08e6e1d8f2f41dbcefb04d7d312bb69
6
+ metadata.gz: 56f72325d2e5420551029a13e2a1d7fd20a5eba658e3fa84998c5fc413cea15b87ea718839c309d7b6d545ba31b8f5ebdb548f0e475fe53a3bc798c0a716adf6
7
+ data.tar.gz: cab1730447f4ffc096e06c35d8d71b4b018a20ffc78414db896af76fb4290b41122174874d0a1b508f00609db8d3275c473c0de36c3d43c32bff598fddc1b269
@@ -6,9 +6,7 @@ require 'google/protobuf'
6
6
 
7
7
  require 'google/api/annotations_pb'
8
8
  require 'google/longrunning/operations_pb'
9
- require 'google/protobuf/any_pb'
10
9
  require 'google/protobuf/duration_pb'
11
- require 'google/protobuf/empty_pb'
12
10
  require 'google/protobuf/timestamp_pb'
13
11
  require 'google/rpc/status_pb'
14
12
  Google::Protobuf::DescriptorPool.generated_pool.build do
@@ -42,6 +40,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
42
40
  repeated :speech_contexts, :message, 6, "google.cloud.speech.v1.SpeechContext"
43
41
  optional :enable_word_time_offsets, :bool, 8
44
42
  optional :enable_automatic_punctuation, :bool, 11
43
+ optional :metadata, :message, 9, "google.cloud.speech.v1.RecognitionMetadata"
45
44
  optional :model, :string, 13
46
45
  optional :use_enhanced, :bool, 14
47
46
  end
@@ -55,6 +54,47 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
55
54
  value :OGG_OPUS, 6
56
55
  value :SPEEX_WITH_HEADER_BYTE, 7
57
56
  end
57
+ add_message "google.cloud.speech.v1.RecognitionMetadata" do
58
+ optional :interaction_type, :enum, 1, "google.cloud.speech.v1.RecognitionMetadata.InteractionType"
59
+ optional :industry_naics_code_of_audio, :uint32, 3
60
+ optional :microphone_distance, :enum, 4, "google.cloud.speech.v1.RecognitionMetadata.MicrophoneDistance"
61
+ optional :original_media_type, :enum, 5, "google.cloud.speech.v1.RecognitionMetadata.OriginalMediaType"
62
+ optional :recording_device_type, :enum, 6, "google.cloud.speech.v1.RecognitionMetadata.RecordingDeviceType"
63
+ optional :recording_device_name, :string, 7
64
+ optional :original_mime_type, :string, 8
65
+ optional :audio_topic, :string, 10
66
+ end
67
+ add_enum "google.cloud.speech.v1.RecognitionMetadata.InteractionType" do
68
+ value :INTERACTION_TYPE_UNSPECIFIED, 0
69
+ value :DISCUSSION, 1
70
+ value :PRESENTATION, 2
71
+ value :PHONE_CALL, 3
72
+ value :VOICEMAIL, 4
73
+ value :PROFESSIONALLY_PRODUCED, 5
74
+ value :VOICE_SEARCH, 6
75
+ value :VOICE_COMMAND, 7
76
+ value :DICTATION, 8
77
+ end
78
+ add_enum "google.cloud.speech.v1.RecognitionMetadata.MicrophoneDistance" do
79
+ value :MICROPHONE_DISTANCE_UNSPECIFIED, 0
80
+ value :NEARFIELD, 1
81
+ value :MIDFIELD, 2
82
+ value :FARFIELD, 3
83
+ end
84
+ add_enum "google.cloud.speech.v1.RecognitionMetadata.OriginalMediaType" do
85
+ value :ORIGINAL_MEDIA_TYPE_UNSPECIFIED, 0
86
+ value :AUDIO, 1
87
+ value :VIDEO, 2
88
+ end
89
+ add_enum "google.cloud.speech.v1.RecognitionMetadata.RecordingDeviceType" do
90
+ value :RECORDING_DEVICE_TYPE_UNSPECIFIED, 0
91
+ value :SMARTPHONE, 1
92
+ value :PC, 2
93
+ value :PHONE_LINE, 3
94
+ value :VEHICLE, 4
95
+ value :OTHER_OUTDOOR_DEVICE, 5
96
+ value :OTHER_INDOOR_DEVICE, 6
97
+ end
58
98
  add_message "google.cloud.speech.v1.SpeechContext" do
59
99
  repeated :phrases, :string, 1
60
100
  end
@@ -88,7 +128,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
88
128
  repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
89
129
  optional :is_final, :bool, 2
90
130
  optional :stability, :float, 3
131
+ optional :result_end_time, :message, 4, "google.protobuf.Duration"
91
132
  optional :channel_tag, :int32, 5
133
+ optional :language_code, :string, 6
92
134
  end
93
135
  add_message "google.cloud.speech.v1.SpeechRecognitionResult" do
94
136
  repeated :alternatives, :message, 1, "google.cloud.speech.v1.SpeechRecognitionAlternative"
@@ -116,6 +158,11 @@ module Google
116
158
  StreamingRecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.StreamingRecognitionConfig").msgclass
117
159
  RecognitionConfig = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig").msgclass
118
160
  RecognitionConfig::AudioEncoding = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionConfig.AudioEncoding").enummodule
161
+ RecognitionMetadata = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata").msgclass
162
+ RecognitionMetadata::InteractionType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata.InteractionType").enummodule
163
+ RecognitionMetadata::MicrophoneDistance = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata.MicrophoneDistance").enummodule
164
+ RecognitionMetadata::OriginalMediaType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata.OriginalMediaType").enummodule
165
+ RecognitionMetadata::RecordingDeviceType = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionMetadata.RecordingDeviceType").enummodule
119
166
  SpeechContext = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.SpeechContext").msgclass
120
167
  RecognitionAudio = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognitionAudio").msgclass
121
168
  RecognizeResponse = Google::Protobuf::DescriptorPool.generated_pool.lookup("google.cloud.speech.v1.RecognizeResponse").msgclass
@@ -165,6 +165,9 @@ module Google
165
165
  # Note: This is currently offered as an experimental service, complimentary
166
166
  # to all users. In the future this may be exclusively available as a
167
167
  # premium feature.
168
+ # @!attribute [rw] metadata
169
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata]
170
+ # *Optional* Metadata regarding this request.
168
171
  # @!attribute [rw] model
169
172
  # @return [String]
170
173
  # *Optional* Which model to select for the given request. Select the model
@@ -284,6 +287,133 @@ module Google
284
287
  end
285
288
  end
286
289
 
290
+ # Description of audio data to be recognized.
291
+ # @!attribute [rw] interaction_type
292
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata::InteractionType]
293
+ # The use case most closely describing the audio content to be recognized.
294
+ # @!attribute [rw] industry_naics_code_of_audio
295
+ # @return [Integer]
296
+ # The industry vertical to which this speech recognition request most
297
+ # closely applies. This is most indicative of the topics contained
298
+ # in the audio. Use the 6-digit NAICS code to identify the industry
299
+ # vertical - see https://www.naics.com/search/.
300
+ # @!attribute [rw] microphone_distance
301
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata::MicrophoneDistance]
302
+ # The audio type that most closely describes the audio being recognized.
303
+ # @!attribute [rw] original_media_type
304
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata::OriginalMediaType]
305
+ # The original media the speech was recorded on.
306
+ # @!attribute [rw] recording_device_type
307
+ # @return [Google::Cloud::Speech::V1::RecognitionMetadata::RecordingDeviceType]
308
+ # The type of device the speech was recorded with.
309
+ # @!attribute [rw] recording_device_name
310
+ # @return [String]
311
+ # The device used to make the recording. Examples 'Nexus 5X' or
312
+ # 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or
313
+ # 'Cardioid Microphone'.
314
+ # @!attribute [rw] original_mime_type
315
+ # @return [String]
316
+ # Mime type of the original audio file. For example `audio/m4a`,
317
+ # `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
318
+ # A list of possible audio mime types is maintained at
319
+ # http://www.iana.org/assignments/media-types/media-types.xhtml#audio
320
+ # @!attribute [rw] audio_topic
321
+ # @return [String]
322
+ # Description of the content. Eg. "Recordings of federal supreme court
323
+ # hearings from 2012".
324
+ class RecognitionMetadata
325
+ # Use case categories that the audio recognition request can be described
326
+ # by.
327
+ module InteractionType
328
+ # Use case is either unknown or is something other than one of the other
329
+ # values below.
330
+ INTERACTION_TYPE_UNSPECIFIED = 0
331
+
332
+ # Multiple people in a conversation or discussion. For example in a
333
+ # meeting with two or more people actively participating. Typically
334
+ # all the primary people speaking would be in the same room (if not,
335
+ # see PHONE_CALL)
336
+ DISCUSSION = 1
337
+
338
+ # One or more persons lecturing or presenting to others, mostly
339
+ # uninterrupted.
340
+ PRESENTATION = 2
341
+
342
+ # A phone-call or video-conference in which two or more people, who are
343
+ # not in the same room, are actively participating.
344
+ PHONE_CALL = 3
345
+
346
+ # A recorded message intended for another person to listen to.
347
+ VOICEMAIL = 4
348
+
349
+ # Professionally produced audio (eg. TV Show, Podcast).
350
+ PROFESSIONALLY_PRODUCED = 5
351
+
352
+ # Transcribe spoken questions and queries into text.
353
+ VOICE_SEARCH = 6
354
+
355
+ # Transcribe voice commands, such as for controlling a device.
356
+ VOICE_COMMAND = 7
357
+
358
+ # Transcribe speech to text to create a written document, such as a
359
+ # text-message, email or report.
360
+ DICTATION = 8
361
+ end
362
+
363
+ # Enumerates the types of capture settings describing an audio file.
364
+ module MicrophoneDistance
365
+ # Audio type is not known.
366
+ MICROPHONE_DISTANCE_UNSPECIFIED = 0
367
+
368
+ # The audio was captured from a closely placed microphone. Eg. phone,
369
+ # dictaphone, or handheld microphone. Generally if there speaker is within
370
+ # 1 meter of the microphone.
371
+ NEARFIELD = 1
372
+
373
+ # The speaker if within 3 meters of the microphone.
374
+ MIDFIELD = 2
375
+
376
+ # The speaker is more than 3 meters away from the microphone.
377
+ FARFIELD = 3
378
+ end
379
+
380
+ # The original media the speech was recorded on.
381
+ module OriginalMediaType
382
+ # Unknown original media type.
383
+ ORIGINAL_MEDIA_TYPE_UNSPECIFIED = 0
384
+
385
+ # The speech data is an audio recording.
386
+ AUDIO = 1
387
+
388
+ # The speech data originally recorded on a video.
389
+ VIDEO = 2
390
+ end
391
+
392
+ # The type of device the speech was recorded with.
393
+ module RecordingDeviceType
394
+ # The recording device is unknown.
395
+ RECORDING_DEVICE_TYPE_UNSPECIFIED = 0
396
+
397
+ # Speech was recorded on a smartphone.
398
+ SMARTPHONE = 1
399
+
400
+ # Speech was recorded using a personal computer or tablet.
401
+ PC = 2
402
+
403
+ # Speech was recorded over a phone line.
404
+ PHONE_LINE = 3
405
+
406
+ # Speech was recorded in a vehicle.
407
+ VEHICLE = 4
408
+
409
+ # Speech was recorded outdoors.
410
+ OTHER_OUTDOOR_DEVICE = 5
411
+
412
+ # Speech was recorded indoors.
413
+ OTHER_INDOOR_DEVICE = 6
414
+ end
415
+ end
416
+
287
417
  # Provides "hints" to the speech recognizer to favor specific words and phrases
288
418
  # in the results.
289
419
  # @!attribute [rw] phrases
@@ -453,11 +583,21 @@ module Google
453
583
  # (completely unstable) to 1.0 (completely stable).
454
584
  # This field is only provided for interim results (`is_final=false`).
455
585
  # The default of 0.0 is a sentinel value indicating `stability` was not set.
586
+ # @!attribute [rw] result_end_time
587
+ # @return [Google::Protobuf::Duration]
588
+ # Output only. Time offset of the end of this result relative to the
589
+ # beginning of the audio.
456
590
  # @!attribute [rw] channel_tag
457
591
  # @return [Integer]
458
592
  # For multi-channel audio, this is the channel number corresponding to the
459
593
  # recognized result for the audio from that channel.
460
594
  # For audio_channel_count = N, its output values can range from '1' to 'N'.
595
+ # @!attribute [rw] language_code
596
+ # @return [String]
597
+ # Output only. The
598
+ # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
599
+ # language in this result. This language code was detected to have the most
600
+ # likelihood of being spoken in the audio.
461
601
  class StreamingRecognitionResult; end
462
602
 
463
603
  # A speech recognition result corresponding to a portion of the audio.
@@ -29,6 +29,7 @@ require "google/longrunning/operations_client"
29
29
 
30
30
  require "google/cloud/speech/v1/cloud_speech_pb"
31
31
  require "google/cloud/speech/v1/credentials"
32
+ require "google/cloud/speech/version"
32
33
 
33
34
  module Google
34
35
  module Cloud
@@ -136,7 +137,7 @@ module Google
136
137
  updater_proc = credentials.updater_proc
137
138
  end
138
139
 
139
- package_version = Gem.loaded_specs['google-cloud-speech'].version.version
140
+ package_version = Google::Cloud::Speech::VERSION
140
141
 
141
142
  google_api_client = "gl-ruby/#{RUBY_VERSION}"
142
143
  google_api_client << " #{lib_name}/#{lib_version}" if lib_name
@@ -59,6 +59,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
59
59
  value :AMR_WB, 5
60
60
  value :OGG_OPUS, 6
61
61
  value :SPEEX_WITH_HEADER_BYTE, 7
62
+ value :MP3, 8
62
63
  end
63
64
  add_message "google.cloud.speech.v1p1beta1.RecognitionMetadata" do
64
65
  optional :interaction_type, :enum, 1, "google.cloud.speech.v1p1beta1.RecognitionMetadata.InteractionType"
@@ -104,6 +105,7 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
104
105
  end
105
106
  add_message "google.cloud.speech.v1p1beta1.SpeechContext" do
106
107
  repeated :phrases, :string, 1
108
+ optional :boost, :float, 4
107
109
  end
108
110
  add_message "google.cloud.speech.v1p1beta1.RecognitionAudio" do
109
111
  oneof :audio_source do
@@ -321,6 +321,11 @@ module Google
321
321
  # is replaced with a single byte containing the block length. Only Speex
322
322
  # wideband is supported. `sample_rate_hertz` must be 16000.
323
323
  SPEEX_WITH_HEADER_BYTE = 7
324
+
325
+ # MP3 audio. Support all standard MP3 bitrates (which range from 32-320
326
+ # kbps). When using this encoding, `sample_rate_hertz` can be optionally
327
+ # unset if not known.
328
+ MP3 = 8
324
329
  end
325
330
  end
326
331
 
@@ -465,6 +470,22 @@ module Google
465
470
  # specific commands are typically spoken by the user. This can also be used
466
471
  # to add additional words to the vocabulary of the recognizer. See
467
472
  # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
473
+ #
474
+ # List items can also be set to classes for groups of words that represent
475
+ # common concepts that occur in natural language. For example, rather than
476
+ # providing phrase hints for every month of the year, using the $MONTH class
477
+ # improves the likelihood of correctly transcribing audio that includes
478
+ # months.
479
+ # @!attribute [rw] boost
480
+ # @return [Float]
481
+ # Hint Boost. Positive value will increase the probability that a specific
482
+ # phrase will be recognized over other similar sounding phrases. The higher
483
+ # the boost, the higher the chance of false positive recognition as well.
484
+ # Negative boost values would correspond to anti-biasing. Anti-biasing is not
485
+ # enabled, so negative boost will simply be ignored. Though `boost` can
486
+ # accept a wide range of positive values, most use cases are best served with
487
+ # values between 0 and 20. We recommend using a binary search approach to
488
+ # finding the optimal value for your use case.
468
489
  class SpeechContext; end
469
490
 
470
491
  # Contains audio data in the encoding specified in the `RecognitionConfig`.
@@ -29,6 +29,7 @@ require "google/longrunning/operations_client"
29
29
 
30
30
  require "google/cloud/speech/v1p1beta1/cloud_speech_pb"
31
31
  require "google/cloud/speech/v1p1beta1/credentials"
32
+ require "google/cloud/speech/version"
32
33
 
33
34
  module Google
34
35
  module Cloud
@@ -136,7 +137,7 @@ module Google
136
137
  updater_proc = credentials.updater_proc
137
138
  end
138
139
 
139
- package_version = Gem.loaded_specs['google-cloud-speech'].version.version
140
+ package_version = Google::Cloud::Speech::VERSION
140
141
 
141
142
  google_api_client = "gl-ruby/#{RUBY_VERSION}"
142
143
  google_api_client << " #{lib_name}/#{lib_version}" if lib_name
@@ -0,0 +1,22 @@
1
+ # Copyright 2019 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Speech
19
+ VERSION = "0.35.0".freeze
20
+ end
21
+ end
22
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.34.1
4
+ version: 0.35.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Google LLC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-04-30 00:00:00.000000000 Z
11
+ date: 2019-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: google-gax
@@ -131,6 +131,7 @@ files:
131
131
  - lib/google/cloud/speech/v1p1beta1/speech_client.rb
132
132
  - lib/google/cloud/speech/v1p1beta1/speech_client_config.json
133
133
  - lib/google/cloud/speech/v1p1beta1/stream.rb
134
+ - lib/google/cloud/speech/version.rb
134
135
  homepage: https://github.com/googleapis/googleapis
135
136
  licenses:
136
137
  - Apache-2.0