google-cloud-text_to_speech-v1beta1 0.20.1 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google/cloud/text_to_speech/v1beta1/text_to_speech/client.rb +1 -1
- data/lib/google/cloud/text_to_speech/v1beta1/version.rb +1 -1
- data/lib/google/cloud/texttospeech/v1beta1/cloud_tts_pb.rb +1 -1
- data/lib/google/cloud/texttospeech/v1beta1/cloud_tts_services_pb.rb +1 -1
- data/proto_docs/google/cloud/texttospeech/v1beta1/cloud_tts.rb +94 -31
- metadata +7 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3ef96f3b0a956f4b46e0f86dd457dcdd3cef1bfd9f4322980de69636b04b34f
|
4
|
+
data.tar.gz: 914a4f70ad565f4d751b8a592eb776139fa35225fb00bd203f46cc1c83e85532
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 185a07cf162ace4e9384c87d909b2fc69a7eed334c2f176a239f772a8de69cebed6d790919747e146c45b6aec46fdf779c1442fea938179133d772a83c9b95b6
|
7
|
+
data.tar.gz: 60a1c0fe780bc38f414f3f2623e77bed9eb6ce867be432c5a5c63cfe478db0d1673776991fc17b3b70851f20e30afbffe609c281d62fa1c49491d5ef6e40addc
|
@@ -359,7 +359,7 @@ module Google
|
|
359
359
|
end
|
360
360
|
|
361
361
|
##
|
362
|
-
# Performs bidirectional streaming speech synthesis:
|
362
|
+
# Performs bidirectional streaming speech synthesis: receives audio while
|
363
363
|
# sending text.
|
364
364
|
#
|
365
365
|
# @param request [::Gapic::StreamInput, ::Enumerable<::Google::Cloud::TextToSpeech::V1beta1::StreamingSynthesizeRequest, ::Hash>]
|
@@ -10,7 +10,7 @@ require 'google/api/field_behavior_pb'
|
|
10
10
|
require 'google/api/resource_pb'
|
11
11
|
|
12
12
|
|
13
|
-
descriptor_data = "\n1google/cloud/texttospeech/v1beta1/cloud_tts.proto\x12!google.cloud.texttospeech.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x19google/api/resource.proto\"/\n\x11ListVoicesRequest\x12\x1a\n\rlanguage_code\x18\x01 \x01(\tB\x03\xe0\x41\x01\"N\n\x12ListVoicesResponse\x12\x38\n\x06voices\x18\x01 \x03(\x0b\x32(.google.cloud.texttospeech.v1beta1.Voice\"\x99\x01\n\x05Voice\x12\x16\n\x0elanguage_codes\x18\x01 \x03(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12G\n\x0bssml_gender\x18\x03 \x01(\x0e\x32\x32.google.cloud.texttospeech.v1beta1.SsmlVoiceGender\x12!\n\x19natural_sample_rate_hertz\x18\x04 \x01(\x05\"d\n\x14\x41\x64vancedVoiceOptions\x12*\n\x1dlow_latency_journey_synthesis\x18\x01 \x01(\x08H\x00\x88\x01\x01\x42 \n\x1e_low_latency_journey_synthesis\"\x99\x04\n\x17SynthesizeSpeechRequest\x12\x45\n\x05input\x18\x01 \x01(\x0b\x32\x31.google.cloud.texttospeech.v1beta1.SynthesisInputB\x03\xe0\x41\x02\x12K\n\x05voice\x18\x02 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.VoiceSelectionParamsB\x03\xe0\x41\x02\x12I\n\x0c\x61udio_config\x18\x03 \x01(\x0b\x32..google.cloud.texttospeech.v1beta1.AudioConfigB\x03\xe0\x41\x02\x12\x66\n\x14\x65nable_time_pointing\x18\x04 \x03(\x0e\x32H.google.cloud.texttospeech.v1beta1.SynthesizeSpeechRequest.TimepointType\x12\\\n\x16\x61\x64vanced_voice_options\x18\x08 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.AdvancedVoiceOptionsH\x00\x88\x01\x01\">\n\rTimepointType\x12\x1e\n\x1aTIMEPOINT_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tSSML_MARK\x10\x01\x42\x19\n\x17_advanced_voice_options\"\
|
13
|
+
descriptor_data = "\n1google/cloud/texttospeech/v1beta1/cloud_tts.proto\x12!google.cloud.texttospeech.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x19google/api/resource.proto\"/\n\x11ListVoicesRequest\x12\x1a\n\rlanguage_code\x18\x01 \x01(\tB\x03\xe0\x41\x01\"N\n\x12ListVoicesResponse\x12\x38\n\x06voices\x18\x01 \x03(\x0b\x32(.google.cloud.texttospeech.v1beta1.Voice\"\x99\x01\n\x05Voice\x12\x16\n\x0elanguage_codes\x18\x01 \x03(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12G\n\x0bssml_gender\x18\x03 \x01(\x0e\x32\x32.google.cloud.texttospeech.v1beta1.SsmlVoiceGender\x12!\n\x19natural_sample_rate_hertz\x18\x04 \x01(\x05\"d\n\x14\x41\x64vancedVoiceOptions\x12*\n\x1dlow_latency_journey_synthesis\x18\x01 \x01(\x08H\x00\x88\x01\x01\x42 \n\x1e_low_latency_journey_synthesis\"\x99\x04\n\x17SynthesizeSpeechRequest\x12\x45\n\x05input\x18\x01 \x01(\x0b\x32\x31.google.cloud.texttospeech.v1beta1.SynthesisInputB\x03\xe0\x41\x02\x12K\n\x05voice\x18\x02 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.VoiceSelectionParamsB\x03\xe0\x41\x02\x12I\n\x0c\x61udio_config\x18\x03 \x01(\x0b\x32..google.cloud.texttospeech.v1beta1.AudioConfigB\x03\xe0\x41\x02\x12\x66\n\x14\x65nable_time_pointing\x18\x04 \x03(\x0e\x32H.google.cloud.texttospeech.v1beta1.SynthesizeSpeechRequest.TimepointType\x12\\\n\x16\x61\x64vanced_voice_options\x18\x08 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.AdvancedVoiceOptionsH\x00\x88\x01\x01\">\n\rTimepointType\x12\x1e\n\x1aTIMEPOINT_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tSSML_MARK\x10\x01\x42\x19\n\x17_advanced_voice_options\"\xa7\x03\n\x19\x43ustomPronunciationParams\x12\x13\n\x06phrase\x18\x01 \x01(\tH\x00\x88\x01\x01\x12m\n\x11phonetic_encoding\x18\x02 \x01(\x0e\x32M.google.cloud.texttospeech.v1beta1.CustomPronunciationParams.PhoneticEncodingH\x01\x88\x01\x01\x12\x1a\n\rpronunciation\x18\x03 \x01(\tH\x02\x88\x01\x01\"\xb6\x01\n\x10PhoneticEncoding\x12!\n\x1dPHONETIC_ENCODING_UNSPECIFIED\x10\x00\x12\x19\n\x15PHONETIC_ENCODING_IPA\x10\x01\x12\x1d\n\x19PHONETIC_ENCODING_X_SAMPA\x10\x02\x12\'\n#PHONETIC_ENCODING_JAPANESE_YOMIGANA\x10\x03\x12\x1c\n\x18PHONETIC_ENCODING_PINYIN\x10\x04\x42\t\n\x07_phraseB\x14\n\x12_phonetic_encodingB\x10\n\x0e_pronunciation\"l\n\x14\x43ustomPronunciations\x12T\n\x0epronunciations\x18\x01 \x03(\x0b\x32<.google.cloud.texttospeech.v1beta1.CustomPronunciationParams\"\x95\x01\n\x12MultiSpeakerMarkup\x12N\n\x05turns\x18\x01 \x03(\x0b\x32:.google.cloud.texttospeech.v1beta1.MultiSpeakerMarkup.TurnB\x03\xe0\x41\x02\x1a/\n\x04Turn\x12\x14\n\x07speaker\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x11\n\x04text\x18\x02 \x01(\tB\x03\xe0\x41\x02\"\x86\x02\n\x0eSynthesisInput\x12\x0e\n\x04text\x18\x01 \x01(\tH\x00\x12\x10\n\x06markup\x18\x05 \x01(\tH\x00\x12\x0e\n\x04ssml\x18\x02 \x01(\tH\x00\x12U\n\x14multi_speaker_markup\x18\x04 \x01(\x0b\x32\x35.google.cloud.texttospeech.v1beta1.MultiSpeakerMarkupH\x00\x12[\n\x15\x63ustom_pronunciations\x18\x03 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.CustomPronunciationsB\x03\xe0\x41\x01\x42\x0e\n\x0cinput_source\"\xbd\x02\n\x14VoiceSelectionParams\x12\x1a\n\rlanguage_code\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x0c\n\x04name\x18\x02 \x01(\t\x12G\n\x0bssml_gender\x18\x03 \x01(\x0e\x32\x32.google.cloud.texttospeech.v1beta1.SsmlVoiceGender\x12J\n\x0c\x63ustom_voice\x18\x04 \x01(\x0b\x32\x34.google.cloud.texttospeech.v1beta1.CustomVoiceParams\x12M\n\x0bvoice_clone\x18\x05 \x01(\x0b\x32\x33.google.cloud.texttospeech.v1beta1.VoiceCloneParamsB\x03\xe0\x41\x01\x12\x17\n\nmodel_name\x18\x06 \x01(\tB\x03\xe0\x41\x01\"\xf6\x01\n\x0b\x41udioConfig\x12M\n\x0e\x61udio_encoding\x18\x01 \x01(\x0e\x32\x30.google.cloud.texttospeech.v1beta1.AudioEncodingB\x03\xe0\x41\x02\x12\x1d\n\rspeaking_rate\x18\x02 \x01(\x01\x42\x06\xe0\x41\x04\xe0\x41\x01\x12\x15\n\x05pitch\x18\x03 \x01(\x01\x42\x06\xe0\x41\x04\xe0\x41\x01\x12\x1e\n\x0evolume_gain_db\x18\x04 \x01(\x01\x42\x06\xe0\x41\x04\xe0\x41\x01\x12\x1e\n\x11sample_rate_hertz\x18\x05 \x01(\x05\x42\x03\xe0\x41\x01\x12\"\n\x12\x65\x66\x66\x65\x63ts_profile_id\x18\x06 \x03(\tB\x06\xe0\x41\x04\xe0\x41\x01\"\xf6\x01\n\x11\x43ustomVoiceParams\x12\x32\n\x05model\x18\x01 \x01(\tB#\xe0\x41\x02\xfa\x41\x1d\n\x1b\x61utoml.googleapis.com/Model\x12\x61\n\x0ereported_usage\x18\x03 \x01(\x0e\x32\x42.google.cloud.texttospeech.v1beta1.CustomVoiceParams.ReportedUsageB\x05\x18\x01\xe0\x41\x01\"J\n\rReportedUsage\x12\x1e\n\x1aREPORTED_USAGE_UNSPECIFIED\x10\x00\x12\x0c\n\x08REALTIME\x10\x01\x12\x0b\n\x07OFFLINE\x10\x02\"2\n\x10VoiceCloneParams\x12\x1e\n\x11voice_cloning_key\x18\x01 \x01(\tB\x03\xe0\x41\x02\"\xb9\x01\n\x18SynthesizeSpeechResponse\x12\x15\n\raudio_content\x18\x01 \x01(\x0c\x12@\n\ntimepoints\x18\x02 \x03(\x0b\x32,.google.cloud.texttospeech.v1beta1.Timepoint\x12\x44\n\x0c\x61udio_config\x18\x04 \x01(\x0b\x32..google.cloud.texttospeech.v1beta1.AudioConfig\"4\n\tTimepoint\x12\x11\n\tmark_name\x18\x04 \x01(\t\x12\x14\n\x0ctime_seconds\x18\x03 \x01(\x01\"\xa4\x01\n\x14StreamingAudioConfig\x12M\n\x0e\x61udio_encoding\x18\x01 \x01(\x0e\x32\x30.google.cloud.texttospeech.v1beta1.AudioEncodingB\x03\xe0\x41\x02\x12\x1e\n\x11sample_rate_hertz\x18\x02 \x01(\x05\x42\x03\xe0\x41\x01\x12\x1d\n\rspeaking_rate\x18\x03 \x01(\x01\x42\x06\xe0\x41\x04\xe0\x41\x01\"\xa3\x02\n\x19StreamingSynthesizeConfig\x12K\n\x05voice\x18\x01 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.VoiceSelectionParamsB\x03\xe0\x41\x02\x12\\\n\x16streaming_audio_config\x18\x04 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.StreamingAudioConfigB\x03\xe0\x41\x01\x12[\n\x15\x63ustom_pronunciations\x18\x05 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.CustomPronunciationsB\x03\xe0\x41\x01\"k\n\x17StreamingSynthesisInput\x12\x0e\n\x04text\x18\x01 \x01(\tH\x00\x12\x10\n\x06markup\x18\x05 \x01(\tH\x00\x12\x13\n\x06prompt\x18\x06 \x01(\tH\x01\x88\x01\x01\x42\x0e\n\x0cinput_sourceB\t\n\x07_prompt\"\xd8\x01\n\x1aStreamingSynthesizeRequest\x12X\n\x10streaming_config\x18\x01 \x01(\x0b\x32<.google.cloud.texttospeech.v1beta1.StreamingSynthesizeConfigH\x00\x12K\n\x05input\x18\x02 \x01(\x0b\x32:.google.cloud.texttospeech.v1beta1.StreamingSynthesisInputH\x00\x42\x13\n\x11streaming_request\"4\n\x1bStreamingSynthesizeResponse\x12\x15\n\raudio_content\x18\x01 \x01(\x0c*W\n\x0fSsmlVoiceGender\x12!\n\x1dSSML_VOICE_GENDER_UNSPECIFIED\x10\x00\x12\x08\n\x04MALE\x10\x01\x12\n\n\x06\x46\x45MALE\x10\x02\x12\x0b\n\x07NEUTRAL\x10\x03*\x8c\x01\n\rAudioEncoding\x12\x1e\n\x1a\x41UDIO_ENCODING_UNSPECIFIED\x10\x00\x12\x0c\n\x08LINEAR16\x10\x01\x12\x07\n\x03MP3\x10\x02\x12\x0f\n\x0bMP3_64_KBPS\x10\x04\x12\x0c\n\x08OGG_OPUS\x10\x03\x12\t\n\x05MULAW\x10\x05\x12\x08\n\x04\x41LAW\x10\x06\x12\x07\n\x03PCM\x10\x07\x12\x07\n\x03M4A\x10\x08\x32\xef\x04\n\x0cTextToSpeech\x12\xa2\x01\n\nListVoices\x12\x34.google.cloud.texttospeech.v1beta1.ListVoicesRequest\x1a\x35.google.cloud.texttospeech.v1beta1.ListVoicesResponse\"\'\xda\x41\rlanguage_code\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1beta1/voices\x12\xcb\x01\n\x10SynthesizeSpeech\x12:.google.cloud.texttospeech.v1beta1.SynthesizeSpeechRequest\x1a;.google.cloud.texttospeech.v1beta1.SynthesizeSpeechResponse\">\xda\x41\x18input,voice,audio_config\x82\xd3\xe4\x93\x02\x1d\"\x18/v1beta1/text:synthesize:\x01*\x12\x9a\x01\n\x13StreamingSynthesize\x12=.google.cloud.texttospeech.v1beta1.StreamingSynthesizeRequest\x1a>.google.cloud.texttospeech.v1beta1.StreamingSynthesizeResponse\"\x00(\x01\x30\x01\x1aO\xca\x41\x1btexttospeech.googleapis.com\xd2\x41.https://www.googleapis.com/auth/cloud-platformB\xd5\x02\n%com.google.cloud.texttospeech.v1beta1B\x11TextToSpeechProtoP\x01ZIcloud.google.com/go/texttospeech/apiv1beta1/texttospeechpb;texttospeechpb\xa2\x02\x04\x43TTS\xaa\x02!Google.Cloud.TextToSpeech.V1Beta1\xca\x02!Google\\Cloud\\TextToSpeech\\V1beta1\xea\x02$Google::Cloud::TextToSpeech::V1beta1\xea\x41U\n\x1b\x61utoml.googleapis.com/Model\x12\x36projects/{project}/locations/{location}/models/{model}b\x06proto3"
|
14
14
|
|
15
15
|
pool = Google::Protobuf::DescriptorPool.generated_pool
|
16
16
|
|
@@ -38,7 +38,7 @@ module Google
|
|
38
38
|
# Synthesizes speech synchronously: receive results after all text input
|
39
39
|
# has been processed.
|
40
40
|
rpc :SynthesizeSpeech, ::Google::Cloud::TextToSpeech::V1beta1::SynthesizeSpeechRequest, ::Google::Cloud::TextToSpeech::V1beta1::SynthesizeSpeechResponse
|
41
|
-
# Performs bidirectional streaming speech synthesis:
|
41
|
+
# Performs bidirectional streaming speech synthesis: receives audio while
|
42
42
|
# sending text.
|
43
43
|
rpc :StreamingSynthesize, stream(::Google::Cloud::TextToSpeech::V1beta1::StreamingSynthesizeRequest), stream(::Google::Cloud::TextToSpeech::V1beta1::StreamingSynthesizeResponse)
|
44
44
|
end
|
@@ -69,8 +69,8 @@ module Google
|
|
69
69
|
# Used for advanced voice options.
|
70
70
|
# @!attribute [rw] low_latency_journey_synthesis
|
71
71
|
# @return [::Boolean]
|
72
|
-
# Only for Journey voices. If false, the synthesis
|
73
|
-
# and
|
72
|
+
# Only for Journey voices. If false, the synthesis is context aware
|
73
|
+
# and has a higher latency.
|
74
74
|
class AdvancedVoiceOptions
|
75
75
|
include ::Google::Protobuf::MessageExts
|
76
76
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -109,9 +109,9 @@ module Google
|
|
109
109
|
# Pronunciation customization for a phrase.
|
110
110
|
# @!attribute [rw] phrase
|
111
111
|
# @return [::String]
|
112
|
-
# The phrase to which the customization
|
113
|
-
# The phrase can be multiple words
|
114
|
-
#
|
112
|
+
# The phrase to which the customization is applied.
|
113
|
+
# The phrase can be multiple words, such as proper nouns, but shouldn't span
|
114
|
+
# the length of the sentence.
|
115
115
|
# @!attribute [rw] phonetic_encoding
|
116
116
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::CustomPronunciationParams::PhoneticEncoding]
|
117
117
|
# The phonetic encoding of the phrase.
|
@@ -128,20 +128,43 @@ module Google
|
|
128
128
|
# Not specified.
|
129
129
|
PHONETIC_ENCODING_UNSPECIFIED = 0
|
130
130
|
|
131
|
-
# IPA
|
131
|
+
# IPA, such as apple -> ˈæpəl.
|
132
132
|
# https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
|
133
133
|
PHONETIC_ENCODING_IPA = 1
|
134
134
|
|
135
|
-
# X-SAMPA
|
135
|
+
# X-SAMPA, such as apple -> "{p@l".
|
136
136
|
# https://en.wikipedia.org/wiki/X-SAMPA
|
137
137
|
PHONETIC_ENCODING_X_SAMPA = 2
|
138
|
+
|
139
|
+
# For reading-to-pron conversion to work well, the `pronunciation` field
|
140
|
+
# should only contain Kanji, Hiragana, and Katakana.
|
141
|
+
#
|
142
|
+
# The pronunciation can also contain pitch accents.
|
143
|
+
# The start of a pitch phrase is specified with `^` and the down-pitch
|
144
|
+
# position is specified with `!`, for example:
|
145
|
+
#
|
146
|
+
# phrase:端 pronunciation:^はし
|
147
|
+
# phrase:箸 pronunciation:^は!し
|
148
|
+
# phrase:橋 pronunciation:^はし!
|
149
|
+
#
|
150
|
+
# We currently only support the Tokyo dialect, which allows at most one
|
151
|
+
# down-pitch per phrase (i.e. at most one `!` between `^`).
|
152
|
+
PHONETIC_ENCODING_JAPANESE_YOMIGANA = 3
|
153
|
+
|
154
|
+
# Used to specify pronunciations for Mandarin words. See
|
155
|
+
# https://en.wikipedia.org/wiki/Pinyin.
|
156
|
+
#
|
157
|
+
# For example: 朝阳, the pronunciation is "chao2 yang2". The number
|
158
|
+
# represents the tone, and there is a space between syllables. Neutral
|
159
|
+
# tones are represented by 5, for example 孩子 "hai2 zi5".
|
160
|
+
PHONETIC_ENCODING_PINYIN = 4
|
138
161
|
end
|
139
162
|
end
|
140
163
|
|
141
164
|
# A collection of pronunciation customizations.
|
142
165
|
# @!attribute [rw] pronunciations
|
143
166
|
# @return [::Array<::Google::Cloud::TextToSpeech::V1beta1::CustomPronunciationParams>]
|
144
|
-
# The pronunciation customizations
|
167
|
+
# The pronunciation customizations are applied.
|
145
168
|
class CustomPronunciations
|
146
169
|
include ::Google::Protobuf::MessageExts
|
147
170
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -155,7 +178,7 @@ module Google
|
|
155
178
|
include ::Google::Protobuf::MessageExts
|
156
179
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
157
180
|
|
158
|
-
# A
|
181
|
+
# A multi-speaker turn.
|
159
182
|
# @!attribute [rw] speaker
|
160
183
|
# @return [::String]
|
161
184
|
# Required. The speaker of the turn, for example, 'O' or 'Q'. Please refer
|
@@ -177,7 +200,13 @@ module Google
|
|
177
200
|
# @return [::String]
|
178
201
|
# The raw text to be synthesized.
|
179
202
|
#
|
180
|
-
# Note: The following fields are mutually exclusive: `text`, `ssml`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
203
|
+
# Note: The following fields are mutually exclusive: `text`, `markup`, `ssml`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
204
|
+
# @!attribute [rw] markup
|
205
|
+
# @return [::String]
|
206
|
+
# Markup for HD voices specifically. This field may not be used with any
|
207
|
+
# other voices.
|
208
|
+
#
|
209
|
+
# Note: The following fields are mutually exclusive: `markup`, `text`, `ssml`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
181
210
|
# @!attribute [rw] ssml
|
182
211
|
# @return [::String]
|
183
212
|
# The SSML document to be synthesized. The SSML document must be valid
|
@@ -186,27 +215,25 @@ module Google
|
|
186
215
|
# more information, see
|
187
216
|
# [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
|
188
217
|
#
|
189
|
-
# Note: The following fields are mutually exclusive: `ssml`, `text`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
218
|
+
# Note: The following fields are mutually exclusive: `ssml`, `text`, `markup`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
190
219
|
# @!attribute [rw] multi_speaker_markup
|
191
220
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::MultiSpeakerMarkup]
|
192
221
|
# The multi-speaker input to be synthesized. Only applicable for
|
193
222
|
# multi-speaker synthesis.
|
194
223
|
#
|
195
|
-
# Note: The following fields are mutually exclusive: `multi_speaker_markup`, `text`, `ssml`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
224
|
+
# Note: The following fields are mutually exclusive: `multi_speaker_markup`, `text`, `markup`, `ssml`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
196
225
|
# @!attribute [rw] custom_pronunciations
|
197
226
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::CustomPronunciations]
|
198
|
-
# Optional. The pronunciation customizations
|
199
|
-
# this is set, the input
|
227
|
+
# Optional. The pronunciation customizations are applied to the input. If
|
228
|
+
# this is set, the input is synthesized using the given pronunciation
|
200
229
|
# customizations.
|
201
230
|
#
|
202
|
-
# The initial support
|
203
|
-
#
|
204
|
-
# VoiceSelectionParams. Journey and Instant Clone voices are
|
205
|
-
# not supported yet.
|
231
|
+
# The initial support is for en-us, with plans to expand to other locales in
|
232
|
+
# the future. Instant Clone voices aren't supported.
|
206
233
|
#
|
207
234
|
# In order to customize the pronunciation of a phrase, there must be an exact
|
208
235
|
# match of the phrase in the input types. If using SSML, the phrase must not
|
209
|
-
# be inside a phoneme tag
|
236
|
+
# be inside a phoneme tag.
|
210
237
|
class SynthesisInput
|
211
238
|
include ::Google::Protobuf::MessageExts
|
212
239
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -246,8 +273,12 @@ module Google
|
|
246
273
|
# @!attribute [rw] voice_clone
|
247
274
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::VoiceCloneParams]
|
248
275
|
# Optional. The configuration for a voice clone. If
|
249
|
-
# [VoiceCloneParams.voice_clone_key] is set, the service
|
250
|
-
#
|
276
|
+
# [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
|
277
|
+
# clone matching the specified configuration.
|
278
|
+
# @!attribute [rw] model_name
|
279
|
+
# @return [::String]
|
280
|
+
# Optional. The name of the model. If set, the service will choose the model
|
281
|
+
# matching the specified configuration.
|
251
282
|
class VoiceSelectionParams
|
252
283
|
include ::Google::Protobuf::MessageExts
|
253
284
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -259,10 +290,10 @@ module Google
|
|
259
290
|
# Required. The format of the audio byte stream.
|
260
291
|
# @!attribute [rw] speaking_rate
|
261
292
|
# @return [::Float]
|
262
|
-
# Optional. Input only. Speaking rate/speed, in the range [0.25,
|
293
|
+
# Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
|
263
294
|
# the normal native speed supported by the specific voice. 2.0 is twice as
|
264
295
|
# fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
|
265
|
-
# speed. Any other values < 0.25 or >
|
296
|
+
# speed. Any other values < 0.25 or > 2.0 will return an error.
|
266
297
|
# @!attribute [rw] pitch
|
267
298
|
# @return [::Float]
|
268
299
|
# Optional. Input only. Speaking pitch, in the range [-20.0, 20.0]. 20 means
|
@@ -375,11 +406,17 @@ module Google
|
|
375
406
|
# @!attribute [rw] audio_encoding
|
376
407
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::AudioEncoding]
|
377
408
|
# Required. The format of the audio byte stream.
|
378
|
-
#
|
379
|
-
#
|
409
|
+
# Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
|
410
|
+
# return an error.
|
380
411
|
# @!attribute [rw] sample_rate_hertz
|
381
412
|
# @return [::Integer]
|
382
413
|
# Optional. The synthesis sample rate (in hertz) for this audio.
|
414
|
+
# @!attribute [rw] speaking_rate
|
415
|
+
# @return [::Float]
|
416
|
+
# Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
|
417
|
+
# the normal native speed supported by the specific voice. 2.0 is twice as
|
418
|
+
# fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
|
419
|
+
# speed. Any other values < 0.25 or > 2.0 will return an error.
|
383
420
|
class StreamingAudioConfig
|
384
421
|
include ::Google::Protobuf::MessageExts
|
385
422
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -392,6 +429,18 @@ module Google
|
|
392
429
|
# @!attribute [rw] streaming_audio_config
|
393
430
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::StreamingAudioConfig]
|
394
431
|
# Optional. The configuration of the synthesized audio.
|
432
|
+
# @!attribute [rw] custom_pronunciations
|
433
|
+
# @return [::Google::Cloud::TextToSpeech::V1beta1::CustomPronunciations]
|
434
|
+
# Optional. The pronunciation customizations are applied to the input. If
|
435
|
+
# this is set, the input is synthesized using the given pronunciation
|
436
|
+
# customizations.
|
437
|
+
#
|
438
|
+
# The initial support is for en-us, with plans to expand to other locales in
|
439
|
+
# the future. Instant Clone voices aren't supported.
|
440
|
+
#
|
441
|
+
# In order to customize the pronunciation of a phrase, there must be an exact
|
442
|
+
# match of the phrase in the input types. If using SSML, the phrase must not
|
443
|
+
# be inside a phoneme tag.
|
395
444
|
class StreamingSynthesizeConfig
|
396
445
|
include ::Google::Protobuf::MessageExts
|
397
446
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -401,9 +450,19 @@ module Google
|
|
401
450
|
# @!attribute [rw] text
|
402
451
|
# @return [::String]
|
403
452
|
# The raw text to be synthesized. It is recommended that each input
|
404
|
-
# contains complete, terminating sentences,
|
405
|
-
#
|
406
|
-
#
|
453
|
+
# contains complete, terminating sentences, which results in better prosody
|
454
|
+
# in the output audio.
|
455
|
+
#
|
456
|
+
# Note: The following fields are mutually exclusive: `text`, `markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
457
|
+
# @!attribute [rw] markup
|
458
|
+
# @return [::String]
|
459
|
+
# Markup for HD voices specifically. This field may not be used with any
|
460
|
+
# other voices.
|
461
|
+
#
|
462
|
+
# Note: The following fields are mutually exclusive: `markup`, `text`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
463
|
+
# @!attribute [rw] prompt
|
464
|
+
# @return [::String]
|
465
|
+
# This is system instruction supported only for controllable voice models.
|
407
466
|
class StreamingSynthesisInput
|
408
467
|
include ::Google::Protobuf::MessageExts
|
409
468
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -466,7 +525,8 @@ module Google
|
|
466
525
|
# Configuration to set up audio encoder. The encoding determines the output
|
467
526
|
# audio format that we'd like.
|
468
527
|
module AudioEncoding
|
469
|
-
# Not specified.
|
528
|
+
# Not specified. Only used by GenerateVoiceCloningKey. Otherwise, will return
|
529
|
+
# result
|
470
530
|
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
471
531
|
AUDIO_ENCODING_UNSPECIFIED = 0
|
472
532
|
|
@@ -480,7 +540,7 @@ module Google
|
|
480
540
|
# MP3 at 64kbps.
|
481
541
|
MP3_64_KBPS = 4
|
482
542
|
|
483
|
-
# Opus encoded audio wrapped in an ogg container. The result
|
543
|
+
# Opus encoded audio wrapped in an ogg container. The result is a
|
484
544
|
# file which can be played natively on Android, and in browsers (at least
|
485
545
|
# Chrome and Firefox). The quality of the encoding is considerably higher
|
486
546
|
# than MP3 while using approximately the same bitrate.
|
@@ -495,9 +555,12 @@ module Google
|
|
495
555
|
ALAW = 6
|
496
556
|
|
497
557
|
# Uncompressed 16-bit signed little-endian samples (Linear PCM).
|
498
|
-
# Note that as opposed to LINEAR16, audio
|
558
|
+
# Note that as opposed to LINEAR16, audio won't be wrapped in a WAV (or
|
499
559
|
# any other) header.
|
500
560
|
PCM = 7
|
561
|
+
|
562
|
+
# M4A audio.
|
563
|
+
M4A = 8
|
501
564
|
end
|
502
565
|
end
|
503
566
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-text_to_speech-v1beta1
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.22.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Google LLC
|
@@ -13,22 +13,16 @@ dependencies:
|
|
13
13
|
name: gapic-common
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
|
-
- - "
|
17
|
-
- !ruby/object:Gem::Version
|
18
|
-
version: 0.25.0
|
19
|
-
- - "<"
|
16
|
+
- - "~>"
|
20
17
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
18
|
+
version: '1.1'
|
22
19
|
type: :runtime
|
23
20
|
prerelease: false
|
24
21
|
version_requirements: !ruby/object:Gem::Requirement
|
25
22
|
requirements:
|
26
|
-
- - "
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
version: 0.25.0
|
29
|
-
- - "<"
|
23
|
+
- - "~>"
|
30
24
|
- !ruby/object:Gem::Version
|
31
|
-
version:
|
25
|
+
version: '1.1'
|
32
26
|
- !ruby/object:Gem::Dependency
|
33
27
|
name: google-cloud-errors
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -105,14 +99,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
105
99
|
requirements:
|
106
100
|
- - ">="
|
107
101
|
- !ruby/object:Gem::Version
|
108
|
-
version: '3.
|
102
|
+
version: '3.1'
|
109
103
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
104
|
requirements:
|
111
105
|
- - ">="
|
112
106
|
- !ruby/object:Gem::Version
|
113
107
|
version: '0'
|
114
108
|
requirements: []
|
115
|
-
rubygems_version: 3.6.
|
109
|
+
rubygems_version: 3.6.9
|
116
110
|
specification_version: 4
|
117
111
|
summary: Synthesizes natural-sounding speech by applying powerful neural network models.
|
118
112
|
test_files: []
|