google-cloud-text_to_speech-v1beta1 0.21.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google/cloud/text_to_speech/v1beta1/text_to_speech/client.rb +1 -1
- data/lib/google/cloud/text_to_speech/v1beta1/version.rb +1 -1
- data/lib/google/cloud/texttospeech/v1beta1/cloud_tts_pb.rb +1 -1
- data/lib/google/cloud/texttospeech/v1beta1/cloud_tts_services_pb.rb +1 -1
- data/proto_docs/google/cloud/texttospeech/v1beta1/cloud_tts.rb +100 -31
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da36d2f04005aeb45395f1e1017cfa2936e3eb58baf6a422509c36c4b1ea329c
|
4
|
+
data.tar.gz: 8a8a9896e4c544b385ce075b10e0c34cd6d29e3f72ce87a462f6573e89bc3556
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8048983c7eb0224c43e5b1675c9f1a5f58f81d80689ac8bd4d91769dfb94a8da08f1a843583ac01334288dda63dc772e736667090358f1fc7fe227e9032a366d
|
7
|
+
data.tar.gz: 10967f02b7bdaaa17da061382e052a75936eaa717d5e56462d58e0c4fa1d7a75dea99dfd7d3372389522ff7b04439c5d800194006c9ccb2be281f8466944681f
|
@@ -359,7 +359,7 @@ module Google
|
|
359
359
|
end
|
360
360
|
|
361
361
|
##
|
362
|
-
# Performs bidirectional streaming speech synthesis:
|
362
|
+
# Performs bidirectional streaming speech synthesis: receives audio while
|
363
363
|
# sending text.
|
364
364
|
#
|
365
365
|
# @param request [::Gapic::StreamInput, ::Enumerable<::Google::Cloud::TextToSpeech::V1beta1::StreamingSynthesizeRequest, ::Hash>]
|
@@ -10,7 +10,7 @@ require 'google/api/field_behavior_pb'
|
|
10
10
|
require 'google/api/resource_pb'
|
11
11
|
|
12
12
|
|
13
|
-
descriptor_data = "\n1google/cloud/texttospeech/v1beta1/cloud_tts.proto\x12!google.cloud.texttospeech.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x19google/api/resource.proto\"/\n\x11ListVoicesRequest\x12\x1a\n\rlanguage_code\x18\x01 \x01(\tB\x03\xe0\x41\x01\"N\n\x12ListVoicesResponse\x12\x38\n\x06voices\x18\x01 \x03(\x0b\x32(.google.cloud.texttospeech.v1beta1.Voice\"\x99\x01\n\x05Voice\x12\x16\n\x0elanguage_codes\x18\x01 \x03(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12G\n\x0bssml_gender\x18\x03 \x01(\x0e\x32\x32.google.cloud.texttospeech.v1beta1.SsmlVoiceGender\x12!\n\x19natural_sample_rate_hertz\x18\x04 \x01(\x05\"d\n\x14\x41\x64vancedVoiceOptions\x12*\n\x1dlow_latency_journey_synthesis\x18\x01 \x01(\x08H\x00\x88\x01\x01\x42 \n\x1e_low_latency_journey_synthesis\"\x99\x04\n\x17SynthesizeSpeechRequest\x12\x45\n\x05input\x18\x01 \x01(\x0b\x32\x31.google.cloud.texttospeech.v1beta1.SynthesisInputB\x03\xe0\x41\x02\x12K\n\x05voice\x18\x02 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.VoiceSelectionParamsB\x03\xe0\x41\x02\x12I\n\x0c\x61udio_config\x18\x03 \x01(\x0b\x32..google.cloud.texttospeech.v1beta1.AudioConfigB\x03\xe0\x41\x02\x12\x66\n\x14\x65nable_time_pointing\x18\x04 \x03(\x0e\x32H.google.cloud.texttospeech.v1beta1.SynthesizeSpeechRequest.TimepointType\x12\\\n\x16\x61\x64vanced_voice_options\x18\x08 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.AdvancedVoiceOptionsH\x00\x88\x01\x01\">\n\rTimepointType\x12\x1e\n\x1aTIMEPOINT_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tSSML_MARK\x10\x01\x42\x19\n\x17_advanced_voice_options\"\
|
13
|
+
descriptor_data = "\n1google/cloud/texttospeech/v1beta1/cloud_tts.proto\x12!google.cloud.texttospeech.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x19google/api/resource.proto\"/\n\x11ListVoicesRequest\x12\x1a\n\rlanguage_code\x18\x01 \x01(\tB\x03\xe0\x41\x01\"N\n\x12ListVoicesResponse\x12\x38\n\x06voices\x18\x01 \x03(\x0b\x32(.google.cloud.texttospeech.v1beta1.Voice\"\x99\x01\n\x05Voice\x12\x16\n\x0elanguage_codes\x18\x01 \x03(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12G\n\x0bssml_gender\x18\x03 \x01(\x0e\x32\x32.google.cloud.texttospeech.v1beta1.SsmlVoiceGender\x12!\n\x19natural_sample_rate_hertz\x18\x04 \x01(\x05\"d\n\x14\x41\x64vancedVoiceOptions\x12*\n\x1dlow_latency_journey_synthesis\x18\x01 \x01(\x08H\x00\x88\x01\x01\x42 \n\x1e_low_latency_journey_synthesis\"\x99\x04\n\x17SynthesizeSpeechRequest\x12\x45\n\x05input\x18\x01 \x01(\x0b\x32\x31.google.cloud.texttospeech.v1beta1.SynthesisInputB\x03\xe0\x41\x02\x12K\n\x05voice\x18\x02 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.VoiceSelectionParamsB\x03\xe0\x41\x02\x12I\n\x0c\x61udio_config\x18\x03 \x01(\x0b\x32..google.cloud.texttospeech.v1beta1.AudioConfigB\x03\xe0\x41\x02\x12\x66\n\x14\x65nable_time_pointing\x18\x04 \x03(\x0e\x32H.google.cloud.texttospeech.v1beta1.SynthesizeSpeechRequest.TimepointType\x12\\\n\x16\x61\x64vanced_voice_options\x18\x08 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.AdvancedVoiceOptionsH\x00\x88\x01\x01\">\n\rTimepointType\x12\x1e\n\x1aTIMEPOINT_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tSSML_MARK\x10\x01\x42\x19\n\x17_advanced_voice_options\"\xa7\x03\n\x19\x43ustomPronunciationParams\x12\x13\n\x06phrase\x18\x01 \x01(\tH\x00\x88\x01\x01\x12m\n\x11phonetic_encoding\x18\x02 \x01(\x0e\x32M.google.cloud.texttospeech.v1beta1.CustomPronunciationParams.PhoneticEncodingH\x01\x88\x01\x01\x12\x1a\n\rpronunciation\x18\x03 \x01(\tH\x02\x88\x01\x01\"\xb6\x01\n\x10PhoneticEncoding\x12!\n\x1dPHONETIC_ENCODING_UNSPECIFIED\x10\x00\x12\x19\n\x15PHONETIC_ENCODING_IPA\x10\x01\x12\x1d\n\x19PHONETIC_ENCODING_X_SAMPA\x10\x02\x12\'\n#PHONETIC_ENCODING_JAPANESE_YOMIGANA\x10\x03\x12\x1c\n\x18PHONETIC_ENCODING_PINYIN\x10\x04\x42\t\n\x07_phraseB\x14\n\x12_phonetic_encodingB\x10\n\x0e_pronunciation\"l\n\x14\x43ustomPronunciations\x12T\n\x0epronunciations\x18\x01 \x03(\x0b\x32<.google.cloud.texttospeech.v1beta1.CustomPronunciationParams\"\x95\x01\n\x12MultiSpeakerMarkup\x12N\n\x05turns\x18\x01 \x03(\x0b\x32:.google.cloud.texttospeech.v1beta1.MultiSpeakerMarkup.TurnB\x03\xe0\x41\x02\x1a/\n\x04Turn\x12\x14\n\x07speaker\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x11\n\x04text\x18\x02 \x01(\tB\x03\xe0\x41\x02\"\xa6\x02\n\x0eSynthesisInput\x12\x0e\n\x04text\x18\x01 \x01(\tH\x00\x12\x10\n\x06markup\x18\x05 \x01(\tH\x00\x12\x0e\n\x04ssml\x18\x02 \x01(\tH\x00\x12U\n\x14multi_speaker_markup\x18\x04 \x01(\x0b\x32\x35.google.cloud.texttospeech.v1beta1.MultiSpeakerMarkupH\x00\x12\x13\n\x06prompt\x18\x06 \x01(\tH\x01\x88\x01\x01\x12[\n\x15\x63ustom_pronunciations\x18\x03 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.CustomPronunciationsB\x03\xe0\x41\x01\x42\x0e\n\x0cinput_sourceB\t\n\x07_prompt\"\xbd\x02\n\x14VoiceSelectionParams\x12\x1a\n\rlanguage_code\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x0c\n\x04name\x18\x02 \x01(\t\x12G\n\x0bssml_gender\x18\x03 \x01(\x0e\x32\x32.google.cloud.texttospeech.v1beta1.SsmlVoiceGender\x12J\n\x0c\x63ustom_voice\x18\x04 \x01(\x0b\x32\x34.google.cloud.texttospeech.v1beta1.CustomVoiceParams\x12M\n\x0bvoice_clone\x18\x05 \x01(\x0b\x32\x33.google.cloud.texttospeech.v1beta1.VoiceCloneParamsB\x03\xe0\x41\x01\x12\x17\n\nmodel_name\x18\x06 \x01(\tB\x03\xe0\x41\x01\"\xf6\x01\n\x0b\x41udioConfig\x12M\n\x0e\x61udio_encoding\x18\x01 \x01(\x0e\x32\x30.google.cloud.texttospeech.v1beta1.AudioEncodingB\x03\xe0\x41\x02\x12\x1d\n\rspeaking_rate\x18\x02 \x01(\x01\x42\x06\xe0\x41\x04\xe0\x41\x01\x12\x15\n\x05pitch\x18\x03 \x01(\x01\x42\x06\xe0\x41\x04\xe0\x41\x01\x12\x1e\n\x0evolume_gain_db\x18\x04 \x01(\x01\x42\x06\xe0\x41\x04\xe0\x41\x01\x12\x1e\n\x11sample_rate_hertz\x18\x05 \x01(\x05\x42\x03\xe0\x41\x01\x12\"\n\x12\x65\x66\x66\x65\x63ts_profile_id\x18\x06 \x03(\tB\x06\xe0\x41\x04\xe0\x41\x01\"\xf6\x01\n\x11\x43ustomVoiceParams\x12\x32\n\x05model\x18\x01 \x01(\tB#\xe0\x41\x02\xfa\x41\x1d\n\x1b\x61utoml.googleapis.com/Model\x12\x61\n\x0ereported_usage\x18\x03 \x01(\x0e\x32\x42.google.cloud.texttospeech.v1beta1.CustomVoiceParams.ReportedUsageB\x05\x18\x01\xe0\x41\x01\"J\n\rReportedUsage\x12\x1e\n\x1aREPORTED_USAGE_UNSPECIFIED\x10\x00\x12\x0c\n\x08REALTIME\x10\x01\x12\x0b\n\x07OFFLINE\x10\x02\"2\n\x10VoiceCloneParams\x12\x1e\n\x11voice_cloning_key\x18\x01 \x01(\tB\x03\xe0\x41\x02\"\xb9\x01\n\x18SynthesizeSpeechResponse\x12\x15\n\raudio_content\x18\x01 \x01(\x0c\x12@\n\ntimepoints\x18\x02 \x03(\x0b\x32,.google.cloud.texttospeech.v1beta1.Timepoint\x12\x44\n\x0c\x61udio_config\x18\x04 \x01(\x0b\x32..google.cloud.texttospeech.v1beta1.AudioConfig\"4\n\tTimepoint\x12\x11\n\tmark_name\x18\x04 \x01(\t\x12\x14\n\x0ctime_seconds\x18\x03 \x01(\x01\"\xa4\x01\n\x14StreamingAudioConfig\x12M\n\x0e\x61udio_encoding\x18\x01 \x01(\x0e\x32\x30.google.cloud.texttospeech.v1beta1.AudioEncodingB\x03\xe0\x41\x02\x12\x1e\n\x11sample_rate_hertz\x18\x02 \x01(\x05\x42\x03\xe0\x41\x01\x12\x1d\n\rspeaking_rate\x18\x03 \x01(\x01\x42\x06\xe0\x41\x04\xe0\x41\x01\"\xa3\x02\n\x19StreamingSynthesizeConfig\x12K\n\x05voice\x18\x01 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.VoiceSelectionParamsB\x03\xe0\x41\x02\x12\\\n\x16streaming_audio_config\x18\x04 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.StreamingAudioConfigB\x03\xe0\x41\x01\x12[\n\x15\x63ustom_pronunciations\x18\x05 \x01(\x0b\x32\x37.google.cloud.texttospeech.v1beta1.CustomPronunciationsB\x03\xe0\x41\x01\"k\n\x17StreamingSynthesisInput\x12\x0e\n\x04text\x18\x01 \x01(\tH\x00\x12\x10\n\x06markup\x18\x05 \x01(\tH\x00\x12\x13\n\x06prompt\x18\x06 \x01(\tH\x01\x88\x01\x01\x42\x0e\n\x0cinput_sourceB\t\n\x07_prompt\"\xd8\x01\n\x1aStreamingSynthesizeRequest\x12X\n\x10streaming_config\x18\x01 \x01(\x0b\x32<.google.cloud.texttospeech.v1beta1.StreamingSynthesizeConfigH\x00\x12K\n\x05input\x18\x02 \x01(\x0b\x32:.google.cloud.texttospeech.v1beta1.StreamingSynthesisInputH\x00\x42\x13\n\x11streaming_request\"4\n\x1bStreamingSynthesizeResponse\x12\x15\n\raudio_content\x18\x01 \x01(\x0c*W\n\x0fSsmlVoiceGender\x12!\n\x1dSSML_VOICE_GENDER_UNSPECIFIED\x10\x00\x12\x08\n\x04MALE\x10\x01\x12\n\n\x06\x46\x45MALE\x10\x02\x12\x0b\n\x07NEUTRAL\x10\x03*\x8c\x01\n\rAudioEncoding\x12\x1e\n\x1a\x41UDIO_ENCODING_UNSPECIFIED\x10\x00\x12\x0c\n\x08LINEAR16\x10\x01\x12\x07\n\x03MP3\x10\x02\x12\x0f\n\x0bMP3_64_KBPS\x10\x04\x12\x0c\n\x08OGG_OPUS\x10\x03\x12\t\n\x05MULAW\x10\x05\x12\x08\n\x04\x41LAW\x10\x06\x12\x07\n\x03PCM\x10\x07\x12\x07\n\x03M4A\x10\x08\x32\xef\x04\n\x0cTextToSpeech\x12\xa2\x01\n\nListVoices\x12\x34.google.cloud.texttospeech.v1beta1.ListVoicesRequest\x1a\x35.google.cloud.texttospeech.v1beta1.ListVoicesResponse\"\'\xda\x41\rlanguage_code\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1beta1/voices\x12\xcb\x01\n\x10SynthesizeSpeech\x12:.google.cloud.texttospeech.v1beta1.SynthesizeSpeechRequest\x1a;.google.cloud.texttospeech.v1beta1.SynthesizeSpeechResponse\">\xda\x41\x18input,voice,audio_config\x82\xd3\xe4\x93\x02\x1d\"\x18/v1beta1/text:synthesize:\x01*\x12\x9a\x01\n\x13StreamingSynthesize\x12=.google.cloud.texttospeech.v1beta1.StreamingSynthesizeRequest\x1a>.google.cloud.texttospeech.v1beta1.StreamingSynthesizeResponse\"\x00(\x01\x30\x01\x1aO\xca\x41\x1btexttospeech.googleapis.com\xd2\x41.https://www.googleapis.com/auth/cloud-platformB\xd5\x02\n%com.google.cloud.texttospeech.v1beta1B\x11TextToSpeechProtoP\x01ZIcloud.google.com/go/texttospeech/apiv1beta1/texttospeechpb;texttospeechpb\xa2\x02\x04\x43TTS\xaa\x02!Google.Cloud.TextToSpeech.V1Beta1\xca\x02!Google\\Cloud\\TextToSpeech\\V1beta1\xea\x02$Google::Cloud::TextToSpeech::V1beta1\xea\x41U\n\x1b\x61utoml.googleapis.com/Model\x12\x36projects/{project}/locations/{location}/models/{model}b\x06proto3"
|
14
14
|
|
15
15
|
pool = Google::Protobuf::DescriptorPool.generated_pool
|
16
16
|
|
@@ -38,7 +38,7 @@ module Google
|
|
38
38
|
# Synthesizes speech synchronously: receive results after all text input
|
39
39
|
# has been processed.
|
40
40
|
rpc :SynthesizeSpeech, ::Google::Cloud::TextToSpeech::V1beta1::SynthesizeSpeechRequest, ::Google::Cloud::TextToSpeech::V1beta1::SynthesizeSpeechResponse
|
41
|
-
# Performs bidirectional streaming speech synthesis:
|
41
|
+
# Performs bidirectional streaming speech synthesis: receives audio while
|
42
42
|
# sending text.
|
43
43
|
rpc :StreamingSynthesize, stream(::Google::Cloud::TextToSpeech::V1beta1::StreamingSynthesizeRequest), stream(::Google::Cloud::TextToSpeech::V1beta1::StreamingSynthesizeResponse)
|
44
44
|
end
|
@@ -69,8 +69,8 @@ module Google
|
|
69
69
|
# Used for advanced voice options.
|
70
70
|
# @!attribute [rw] low_latency_journey_synthesis
|
71
71
|
# @return [::Boolean]
|
72
|
-
# Only for Journey voices. If false, the synthesis
|
73
|
-
# and
|
72
|
+
# Only for Journey voices. If false, the synthesis is context aware
|
73
|
+
# and has a higher latency.
|
74
74
|
class AdvancedVoiceOptions
|
75
75
|
include ::Google::Protobuf::MessageExts
|
76
76
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -109,9 +109,9 @@ module Google
|
|
109
109
|
# Pronunciation customization for a phrase.
|
110
110
|
# @!attribute [rw] phrase
|
111
111
|
# @return [::String]
|
112
|
-
# The phrase to which the customization
|
113
|
-
# The phrase can be multiple words
|
114
|
-
#
|
112
|
+
# The phrase to which the customization is applied.
|
113
|
+
# The phrase can be multiple words, such as proper nouns, but shouldn't span
|
114
|
+
# the length of the sentence.
|
115
115
|
# @!attribute [rw] phonetic_encoding
|
116
116
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::CustomPronunciationParams::PhoneticEncoding]
|
117
117
|
# The phonetic encoding of the phrase.
|
@@ -128,20 +128,43 @@ module Google
|
|
128
128
|
# Not specified.
|
129
129
|
PHONETIC_ENCODING_UNSPECIFIED = 0
|
130
130
|
|
131
|
-
# IPA
|
131
|
+
# IPA, such as apple -> ˈæpəl.
|
132
132
|
# https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
|
133
133
|
PHONETIC_ENCODING_IPA = 1
|
134
134
|
|
135
|
-
# X-SAMPA
|
135
|
+
# X-SAMPA, such as apple -> "{p@l".
|
136
136
|
# https://en.wikipedia.org/wiki/X-SAMPA
|
137
137
|
PHONETIC_ENCODING_X_SAMPA = 2
|
138
|
+
|
139
|
+
# For reading-to-pron conversion to work well, the `pronunciation` field
|
140
|
+
# should only contain Kanji, Hiragana, and Katakana.
|
141
|
+
#
|
142
|
+
# The pronunciation can also contain pitch accents.
|
143
|
+
# The start of a pitch phrase is specified with `^` and the down-pitch
|
144
|
+
# position is specified with `!`, for example:
|
145
|
+
#
|
146
|
+
# phrase:端 pronunciation:^はし
|
147
|
+
# phrase:箸 pronunciation:^は!し
|
148
|
+
# phrase:橋 pronunciation:^はし!
|
149
|
+
#
|
150
|
+
# We currently only support the Tokyo dialect, which allows at most one
|
151
|
+
# down-pitch per phrase (i.e. at most one `!` between `^`).
|
152
|
+
PHONETIC_ENCODING_JAPANESE_YOMIGANA = 3
|
153
|
+
|
154
|
+
# Used to specify pronunciations for Mandarin words. See
|
155
|
+
# https://en.wikipedia.org/wiki/Pinyin.
|
156
|
+
#
|
157
|
+
# For example: 朝阳, the pronunciation is "chao2 yang2". The number
|
158
|
+
# represents the tone, and there is a space between syllables. Neutral
|
159
|
+
# tones are represented by 5, for example 孩子 "hai2 zi5".
|
160
|
+
PHONETIC_ENCODING_PINYIN = 4
|
138
161
|
end
|
139
162
|
end
|
140
163
|
|
141
164
|
# A collection of pronunciation customizations.
|
142
165
|
# @!attribute [rw] pronunciations
|
143
166
|
# @return [::Array<::Google::Cloud::TextToSpeech::V1beta1::CustomPronunciationParams>]
|
144
|
-
# The pronunciation customizations
|
167
|
+
# The pronunciation customizations are applied.
|
145
168
|
class CustomPronunciations
|
146
169
|
include ::Google::Protobuf::MessageExts
|
147
170
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -155,7 +178,7 @@ module Google
|
|
155
178
|
include ::Google::Protobuf::MessageExts
|
156
179
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
157
180
|
|
158
|
-
# A
|
181
|
+
# A multi-speaker turn.
|
159
182
|
# @!attribute [rw] speaker
|
160
183
|
# @return [::String]
|
161
184
|
# Required. The speaker of the turn, for example, 'O' or 'Q'. Please refer
|
@@ -177,7 +200,13 @@ module Google
|
|
177
200
|
# @return [::String]
|
178
201
|
# The raw text to be synthesized.
|
179
202
|
#
|
180
|
-
# Note: The following fields are mutually exclusive: `text`, `ssml`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
203
|
+
# Note: The following fields are mutually exclusive: `text`, `markup`, `ssml`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
204
|
+
# @!attribute [rw] markup
|
205
|
+
# @return [::String]
|
206
|
+
# Markup for HD voices specifically. This field may not be used with any
|
207
|
+
# other voices.
|
208
|
+
#
|
209
|
+
# Note: The following fields are mutually exclusive: `markup`, `text`, `ssml`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
181
210
|
# @!attribute [rw] ssml
|
182
211
|
# @return [::String]
|
183
212
|
# The SSML document to be synthesized. The SSML document must be valid
|
@@ -186,27 +215,31 @@ module Google
|
|
186
215
|
# more information, see
|
187
216
|
# [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
|
188
217
|
#
|
189
|
-
# Note: The following fields are mutually exclusive: `ssml`, `text`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
218
|
+
# Note: The following fields are mutually exclusive: `ssml`, `text`, `markup`, `multi_speaker_markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
190
219
|
# @!attribute [rw] multi_speaker_markup
|
191
220
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::MultiSpeakerMarkup]
|
192
221
|
# The multi-speaker input to be synthesized. Only applicable for
|
193
222
|
# multi-speaker synthesis.
|
194
223
|
#
|
195
|
-
# Note: The following fields are mutually exclusive: `multi_speaker_markup`, `text`, `ssml`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
224
|
+
# Note: The following fields are mutually exclusive: `multi_speaker_markup`, `text`, `markup`, `ssml`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
225
|
+
# @!attribute [rw] prompt
|
226
|
+
# @return [::String]
|
227
|
+
# This system instruction is supported only for controllable/promptable voice
|
228
|
+
# models. If this system instruction is used, we pass the unedited text to
|
229
|
+
# Gemini-TTS. Otherwise, a default system instruction is used. AI Studio
|
230
|
+
# calls this system instruction, Style Instructions.
|
196
231
|
# @!attribute [rw] custom_pronunciations
|
197
232
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::CustomPronunciations]
|
198
|
-
# Optional. The pronunciation customizations
|
199
|
-
# this is set, the input
|
233
|
+
# Optional. The pronunciation customizations are applied to the input. If
|
234
|
+
# this is set, the input is synthesized using the given pronunciation
|
200
235
|
# customizations.
|
201
236
|
#
|
202
|
-
# The initial support
|
203
|
-
#
|
204
|
-
# VoiceSelectionParams. Journey and Instant Clone voices are
|
205
|
-
# not supported yet.
|
237
|
+
# The initial support is for en-us, with plans to expand to other locales in
|
238
|
+
# the future. Instant Clone voices aren't supported.
|
206
239
|
#
|
207
240
|
# In order to customize the pronunciation of a phrase, there must be an exact
|
208
241
|
# match of the phrase in the input types. If using SSML, the phrase must not
|
209
|
-
# be inside a phoneme tag
|
242
|
+
# be inside a phoneme tag.
|
210
243
|
class SynthesisInput
|
211
244
|
include ::Google::Protobuf::MessageExts
|
212
245
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -246,8 +279,12 @@ module Google
|
|
246
279
|
# @!attribute [rw] voice_clone
|
247
280
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::VoiceCloneParams]
|
248
281
|
# Optional. The configuration for a voice clone. If
|
249
|
-
# [VoiceCloneParams.voice_clone_key] is set, the service
|
250
|
-
#
|
282
|
+
# [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
|
283
|
+
# clone matching the specified configuration.
|
284
|
+
# @!attribute [rw] model_name
|
285
|
+
# @return [::String]
|
286
|
+
# Optional. The name of the model. If set, the service will choose the model
|
287
|
+
# matching the specified configuration.
|
251
288
|
class VoiceSelectionParams
|
252
289
|
include ::Google::Protobuf::MessageExts
|
253
290
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -259,10 +296,10 @@ module Google
|
|
259
296
|
# Required. The format of the audio byte stream.
|
260
297
|
# @!attribute [rw] speaking_rate
|
261
298
|
# @return [::Float]
|
262
|
-
# Optional. Input only. Speaking rate/speed, in the range [0.25,
|
299
|
+
# Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
|
263
300
|
# the normal native speed supported by the specific voice. 2.0 is twice as
|
264
301
|
# fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
|
265
|
-
# speed. Any other values < 0.25 or >
|
302
|
+
# speed. Any other values < 0.25 or > 2.0 will return an error.
|
266
303
|
# @!attribute [rw] pitch
|
267
304
|
# @return [::Float]
|
268
305
|
# Optional. Input only. Speaking pitch, in the range [-20.0, 20.0]. 20 means
|
@@ -375,11 +412,17 @@ module Google
|
|
375
412
|
# @!attribute [rw] audio_encoding
|
376
413
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::AudioEncoding]
|
377
414
|
# Required. The format of the audio byte stream.
|
378
|
-
#
|
379
|
-
#
|
415
|
+
# Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
|
416
|
+
# return an error.
|
380
417
|
# @!attribute [rw] sample_rate_hertz
|
381
418
|
# @return [::Integer]
|
382
419
|
# Optional. The synthesis sample rate (in hertz) for this audio.
|
420
|
+
# @!attribute [rw] speaking_rate
|
421
|
+
# @return [::Float]
|
422
|
+
# Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
|
423
|
+
# the normal native speed supported by the specific voice. 2.0 is twice as
|
424
|
+
# fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
|
425
|
+
# speed. Any other values < 0.25 or > 2.0 will return an error.
|
383
426
|
class StreamingAudioConfig
|
384
427
|
include ::Google::Protobuf::MessageExts
|
385
428
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -392,6 +435,18 @@ module Google
|
|
392
435
|
# @!attribute [rw] streaming_audio_config
|
393
436
|
# @return [::Google::Cloud::TextToSpeech::V1beta1::StreamingAudioConfig]
|
394
437
|
# Optional. The configuration of the synthesized audio.
|
438
|
+
# @!attribute [rw] custom_pronunciations
|
439
|
+
# @return [::Google::Cloud::TextToSpeech::V1beta1::CustomPronunciations]
|
440
|
+
# Optional. The pronunciation customizations are applied to the input. If
|
441
|
+
# this is set, the input is synthesized using the given pronunciation
|
442
|
+
# customizations.
|
443
|
+
#
|
444
|
+
# The initial support is for en-us, with plans to expand to other locales in
|
445
|
+
# the future. Instant Clone voices aren't supported.
|
446
|
+
#
|
447
|
+
# In order to customize the pronunciation of a phrase, there must be an exact
|
448
|
+
# match of the phrase in the input types. If using SSML, the phrase must not
|
449
|
+
# be inside a phoneme tag.
|
395
450
|
class StreamingSynthesizeConfig
|
396
451
|
include ::Google::Protobuf::MessageExts
|
397
452
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -401,9 +456,19 @@ module Google
|
|
401
456
|
# @!attribute [rw] text
|
402
457
|
# @return [::String]
|
403
458
|
# The raw text to be synthesized. It is recommended that each input
|
404
|
-
# contains complete, terminating sentences,
|
405
|
-
#
|
406
|
-
#
|
459
|
+
# contains complete, terminating sentences, which results in better prosody
|
460
|
+
# in the output audio.
|
461
|
+
#
|
462
|
+
# Note: The following fields are mutually exclusive: `text`, `markup`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
463
|
+
# @!attribute [rw] markup
|
464
|
+
# @return [::String]
|
465
|
+
# Markup for HD voices specifically. This field may not be used with any
|
466
|
+
# other voices.
|
467
|
+
#
|
468
|
+
# Note: The following fields are mutually exclusive: `markup`, `text`. If a field in that set is populated, all other fields in the set will automatically be cleared.
|
469
|
+
# @!attribute [rw] prompt
|
470
|
+
# @return [::String]
|
471
|
+
# This is system instruction supported only for controllable voice models.
|
407
472
|
class StreamingSynthesisInput
|
408
473
|
include ::Google::Protobuf::MessageExts
|
409
474
|
extend ::Google::Protobuf::MessageExts::ClassMethods
|
@@ -466,7 +531,8 @@ module Google
|
|
466
531
|
# Configuration to set up audio encoder. The encoding determines the output
|
467
532
|
# audio format that we'd like.
|
468
533
|
module AudioEncoding
|
469
|
-
# Not specified.
|
534
|
+
# Not specified. Only used by GenerateVoiceCloningKey. Otherwise, will return
|
535
|
+
# result
|
470
536
|
# [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
471
537
|
AUDIO_ENCODING_UNSPECIFIED = 0
|
472
538
|
|
@@ -480,7 +546,7 @@ module Google
|
|
480
546
|
# MP3 at 64kbps.
|
481
547
|
MP3_64_KBPS = 4
|
482
548
|
|
483
|
-
# Opus encoded audio wrapped in an ogg container. The result
|
549
|
+
# Opus encoded audio wrapped in an ogg container. The result is a
|
484
550
|
# file which can be played natively on Android, and in browsers (at least
|
485
551
|
# Chrome and Firefox). The quality of the encoding is considerably higher
|
486
552
|
# than MP3 while using approximately the same bitrate.
|
@@ -495,9 +561,12 @@ module Google
|
|
495
561
|
ALAW = 6
|
496
562
|
|
497
563
|
# Uncompressed 16-bit signed little-endian samples (Linear PCM).
|
498
|
-
# Note that as opposed to LINEAR16, audio
|
564
|
+
# Note that as opposed to LINEAR16, audio won't be wrapped in a WAV (or
|
499
565
|
# any other) header.
|
500
566
|
PCM = 7
|
567
|
+
|
568
|
+
# M4A audio.
|
569
|
+
M4A = 8
|
501
570
|
end
|
502
571
|
end
|
503
572
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-text_to_speech-v1beta1
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.23.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Google LLC
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
requirements:
|
16
16
|
- - "~>"
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: '1.
|
18
|
+
version: '1.1'
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - "~>"
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: '1.
|
25
|
+
version: '1.1'
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: google-cloud-errors
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
106
|
- !ruby/object:Gem::Version
|
107
107
|
version: '0'
|
108
108
|
requirements: []
|
109
|
-
rubygems_version: 3.6.
|
109
|
+
rubygems_version: 3.6.9
|
110
110
|
specification_version: 4
|
111
111
|
summary: Synthesizes natural-sounding speech by applying powerful neural network models.
|
112
112
|
test_files: []
|