rb-edge-tts 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +17 -0
- data/LICENSE +195 -0
- data/README.md +257 -0
- data/Rakefile +8 -0
- data/examples/async_audio_gen_with_dynamic_voice_selection.rb +26 -0
- data/examples/async_audio_gen_with_predefined_voice.rb +13 -0
- data/examples/async_audio_gen_with_predefined_voice_async.rb +16 -0
- data/examples/async_audio_streaming_with_subtitles.rb +30 -0
- data/exe/rb-edge-playback +168 -0
- data/exe/rb-edge-tts +192 -0
- data/lib/edge_playback/version.rb +5 -0
- data/lib/rb_edge_tts/communicate.rb +336 -0
- data/lib/rb_edge_tts/constants.rb +38 -0
- data/lib/rb_edge_tts/drm.rb +82 -0
- data/lib/rb_edge_tts/exceptions.rb +10 -0
- data/lib/rb_edge_tts/srt_composer.rb +99 -0
- data/lib/rb_edge_tts/submaker.rb +39 -0
- data/lib/rb_edge_tts/typing.rb +93 -0
- data/lib/rb_edge_tts/util.rb +124 -0
- data/lib/rb_edge_tts/version.rb +6 -0
- data/lib/rb_edge_tts/voices_manager.rb +100 -0
- data/lib/rb_edge_tts.rb +19 -0
- data/lib/rb_edge_tts_simple.rb +24 -0
- data/rb-edge-tts.gemspec +42 -0
- metadata +196 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RbEdgeTTS
|
|
4
|
+
BASE_URL = 'speech.platform.bing.com/consumer/speech/synthesize/readaloud'
|
|
5
|
+
TRUSTED_CLIENT_TOKEN = '6A5AA1D4EAFF4E9FB37E23D68491D6F4'
|
|
6
|
+
|
|
7
|
+
WSS_URL = "wss://#{BASE_URL}/edge/v1?TrustedClientToken=#{TRUSTED_CLIENT_TOKEN}"
|
|
8
|
+
VOICE_LIST = "https://#{BASE_URL}/voices/list?trustedclienttoken=#{TRUSTED_CLIENT_TOKEN}"
|
|
9
|
+
|
|
10
|
+
DEFAULT_VOICE = 'en-US-EmmaMultilingualNeural'
|
|
11
|
+
|
|
12
|
+
CHROMIUM_FULL_VERSION = '143.0.3650.75'
|
|
13
|
+
CHROMIUM_MAJOR_VERSION = CHROMIUM_FULL_VERSION.split('.', 2).first
|
|
14
|
+
SEC_MS_GEC_VERSION = "1-#{CHROMIUM_FULL_VERSION}"
|
|
15
|
+
|
|
16
|
+
BASE_HEADERS = {
|
|
17
|
+
'User-Agent' => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/#{CHROMIUM_MAJOR_VERSION}.0.0.0 Safari/537.36 Edg/#{CHROMIUM_MAJOR_VERSION}.0.0.0",
|
|
18
|
+
'Accept-Encoding' => 'gzip, deflate, br, zstd',
|
|
19
|
+
'Accept-Language' => 'en-US,en;q=0.9'
|
|
20
|
+
}.freeze
|
|
21
|
+
|
|
22
|
+
WSS_HEADERS = BASE_HEADERS.merge(
|
|
23
|
+
'Pragma' => 'no-cache',
|
|
24
|
+
'Cache-Control' => 'no-cache',
|
|
25
|
+
'Origin' => 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
|
|
26
|
+
'Sec-WebSocket-Version' => '13'
|
|
27
|
+
).freeze
|
|
28
|
+
|
|
29
|
+
VOICE_HEADERS = BASE_HEADERS.merge(
|
|
30
|
+
'Authority' => 'speech.platform.bing.com',
|
|
31
|
+
'Sec-CH-UA' => "\" Not;A Brand\";v=\"99\", \"Microsoft Edge\";v=\"#{CHROMIUM_MAJOR_VERSION}\", \"Chromium\";v=\"#{CHROMIUM_MAJOR_VERSION}\"",
|
|
32
|
+
'Sec-CH-UA-Mobile' => '?0',
|
|
33
|
+
'Accept' => '*/*',
|
|
34
|
+
'Sec-Fetch-Site' => 'none',
|
|
35
|
+
'Sec-Fetch-Mode' => 'cors',
|
|
36
|
+
'Sec-Fetch-Dest' => 'empty'
|
|
37
|
+
).freeze
|
|
38
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'securerandom'
|
|
5
|
+
require 'time'
|
|
6
|
+
|
|
7
|
+
module RbEdgeTTS
|
|
8
|
+
class DRM
|
|
9
|
+
@clock_skew_seconds = 0.0
|
|
10
|
+
|
|
11
|
+
WIN_EPOCH = 11_644_473_600
|
|
12
|
+
S_TO_NS = 1_000_000_000
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
attr_accessor :clock_skew_seconds
|
|
16
|
+
|
|
17
|
+
def adj_clock_skew_seconds(skew_seconds)
|
|
18
|
+
@clock_skew_seconds += skew_seconds
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def get_unix_timestamp
|
|
22
|
+
Time.now.utc.to_f + @clock_skew_seconds
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def parse_rfc2616_date(date_string)
|
|
26
|
+
Time.rfc2822(date_string)&.to_f
|
|
27
|
+
rescue ArgumentError, TypeError
|
|
28
|
+
nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def handle_client_response_error(error)
|
|
32
|
+
raise SkewAdjustmentError, 'No server date in headers.' unless error.headers
|
|
33
|
+
|
|
34
|
+
server_date = error.headers['Date'] || error.headers['date']
|
|
35
|
+
raise SkewAdjustmentError, 'No server date in headers.' unless server_date
|
|
36
|
+
|
|
37
|
+
server_date_parsed = parse_rfc2616_date(server_date)
|
|
38
|
+
raise SkewAdjustmentError, "Failed to parse server date: #{server_date}" unless server_date_parsed
|
|
39
|
+
|
|
40
|
+
client_date = get_unix_timestamp
|
|
41
|
+
adj_clock_skew_seconds(server_date_parsed - client_date)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def generate_sec_ms_gec
|
|
45
|
+
ticks = get_unix_timestamp
|
|
46
|
+
ticks += WIN_EPOCH
|
|
47
|
+
ticks -= (ticks % 300)
|
|
48
|
+
ticks = (ticks * S_TO_NS / 100).round
|
|
49
|
+
|
|
50
|
+
str_to_hash = "#{ticks.to_i}#{TRUSTED_CLIENT_TOKEN}"
|
|
51
|
+
Digest::SHA256.hexdigest(str_to_hash).upcase
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def command_request(boundary)
|
|
55
|
+
word_boundary = boundary == 'WordBoundary'
|
|
56
|
+
wd = word_boundary ? 'true' : 'false'
|
|
57
|
+
sq = !word_boundary ? 'true' : 'false'
|
|
58
|
+
|
|
59
|
+
"X-Timestamp:#{Util.date_to_string}\r\n" \
|
|
60
|
+
"Content-Type:application/json; charset=utf-8\r\n" \
|
|
61
|
+
"Path:speech.config\r\n\r\n" \
|
|
62
|
+
'{"context":{"synthesis":{"audio":{"metadataoptions":{' \
|
|
63
|
+
"\"sentenceBoundaryEnabled\":\"#{sq}\",\"wordBoundaryEnabled\":\"#{wd}\"" \
|
|
64
|
+
'},' \
|
|
65
|
+
'"outputFormat":"audio-24khz-48kbitrate-mono-mp3"' \
|
|
66
|
+
'}}}}'
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def generate_muid
|
|
70
|
+
SecureRandom.hex(16).upcase
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def headers_with_muid(headers)
|
|
74
|
+
combined_headers = headers.dup
|
|
75
|
+
raise ArgumentError, 'Headers already contain Cookie' if combined_headers.key?('Cookie')
|
|
76
|
+
|
|
77
|
+
combined_headers['Cookie'] = "muid=#{generate_muid};"
|
|
78
|
+
combined_headers
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RbEdgeTTS
|
|
4
|
+
class EdgeTTSException < StandardError; end
|
|
5
|
+
class UnknownResponse < EdgeTTSException; end
|
|
6
|
+
class UnexpectedResponse < EdgeTTSException; end
|
|
7
|
+
class NoAudioReceived < EdgeTTSException; end
|
|
8
|
+
class WebSocketError < EdgeTTSException; end
|
|
9
|
+
class SkewAdjustmentError < EdgeTTSException; end
|
|
10
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RbEdgeTTS
|
|
4
|
+
class Subtitle
|
|
5
|
+
attr_reader :index, :start, :end, :content
|
|
6
|
+
|
|
7
|
+
def initialize(index, start_time, end_time, content)
|
|
8
|
+
@index = index
|
|
9
|
+
@start = start_time
|
|
10
|
+
@end = end_time
|
|
11
|
+
@content = content
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def to_srt(eol = "\n")
|
|
15
|
+
output_content = make_legal_content(content)
|
|
16
|
+
output_content = output_content.gsub("\n", eol) if eol != "\n"
|
|
17
|
+
|
|
18
|
+
template = "#{index}#{eol}#{timedelta_to_srt_timestamp(start)} --> #{timedelta_to_srt_timestamp(@end)}#{eol}#{output_content}#{eol}#{eol}"
|
|
19
|
+
template
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def <=>(other)
|
|
23
|
+
[start, @end, index] <=> [other.start, other.end, other.index]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def hash
|
|
27
|
+
[index, start, @end, content].hash
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
alias eql? ==
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def make_legal_content(content_str)
|
|
35
|
+
return content_str unless content_str.start_with?("\n") || content_str.include?("\n\n")
|
|
36
|
+
|
|
37
|
+
content_str.strip.split(/\n\n+/).join("\n")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def timedelta_to_srt_timestamp(time_delta)
|
|
41
|
+
total_seconds = time_delta.to_i
|
|
42
|
+
hours, remainder = total_seconds.divmod(3600)
|
|
43
|
+
minutes, seconds = remainder.divmod(60)
|
|
44
|
+
milliseconds = (time_delta * 1000).to_i % 1000
|
|
45
|
+
|
|
46
|
+
format('%02d:%02d:%02d,%03d', hours, minutes, seconds, milliseconds)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
module SRTComposer
|
|
51
|
+
SECONDS_IN_HOUR = 3600
|
|
52
|
+
SECONDS_IN_MINUTE = 60
|
|
53
|
+
|
|
54
|
+
def self.compose(subtitles, reindex: true, start_index: 1, eol: "\n")
|
|
55
|
+
subtitles_to_use = reindex ? sort_and_reindex(subtitles, start_index:) : subtitles
|
|
56
|
+
subtitles_to_use.map { |sub| sub.to_srt(eol) }.join
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def self.sort_and_reindex(subtitles, start_index: 1)
|
|
60
|
+
sorted_subs = subtitles.sort
|
|
61
|
+
result = []
|
|
62
|
+
current_index = start_index
|
|
63
|
+
|
|
64
|
+
sorted_subs.each do |sub|
|
|
65
|
+
next unless should_include_subtitle?(sub)
|
|
66
|
+
|
|
67
|
+
new_sub = Subtitle.new(current_index, sub.start, sub.end, sub.content)
|
|
68
|
+
result << new_sub
|
|
69
|
+
current_index += 1
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
result
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
class << self
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
def should_include_subtitle?(subtitle)
|
|
79
|
+
return false if content_empty?(subtitle)
|
|
80
|
+
return false if negative_start_time?(subtitle)
|
|
81
|
+
return false if invalid_time_range?(subtitle)
|
|
82
|
+
|
|
83
|
+
true
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def content_empty?(subtitle)
|
|
87
|
+
subtitle.content.strip.empty?
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def negative_start_time?(subtitle)
|
|
91
|
+
subtitle.start < 0
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def invalid_time_range?(subtitle)
|
|
95
|
+
subtitle.start >= subtitle.end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'srt_composer'
|
|
4
|
+
|
|
5
|
+
module RbEdgeTTS
|
|
6
|
+
class SubMaker
|
|
7
|
+
attr_reader :cues, :type
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@cues = []
|
|
11
|
+
@type = nil
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def feed(msg)
|
|
15
|
+
raise ArgumentError, 'msg must be a TTSChunk' unless msg.is_a?(TTSChunk)
|
|
16
|
+
raise ArgumentError, "Invalid message type, expected 'WordBoundary' or 'SentenceBoundary'." unless %w[
|
|
17
|
+
WordBoundary SentenceBoundary
|
|
18
|
+
].include?(msg.type)
|
|
19
|
+
|
|
20
|
+
@type = msg.type if @type.nil?
|
|
21
|
+
raise ArgumentError, "Expected message type '#{@type}', but got '#{msg.type}'." if @type != msg.type
|
|
22
|
+
|
|
23
|
+
start_time = msg.offset / 10_000_000.0
|
|
24
|
+
end_time = (msg.offset + msg.duration) / 10_000_000.0
|
|
25
|
+
subtitle = Subtitle.new(cues.size + 1, start_time, end_time, msg.text)
|
|
26
|
+
@cues << subtitle
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def get_srt
|
|
30
|
+
SRTComposer.compose(@cues)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
alias to_srt get_srt
|
|
34
|
+
|
|
35
|
+
def to_s
|
|
36
|
+
get_srt
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RbEdgeTTS
|
|
4
|
+
TTSChunk = Struct.new(:type, :data, :offset, :duration, :text, keyword_init: true)
|
|
5
|
+
|
|
6
|
+
VoiceTag = Struct.new(:content_categories, :voice_personalities, keyword_init: true)
|
|
7
|
+
|
|
8
|
+
Voice = Struct.new(
|
|
9
|
+
:name,
|
|
10
|
+
:short_name,
|
|
11
|
+
:gender,
|
|
12
|
+
:locale,
|
|
13
|
+
:suggested_codec,
|
|
14
|
+
:friendly_name,
|
|
15
|
+
:status,
|
|
16
|
+
:voice_tag,
|
|
17
|
+
keyword_init: true
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
VoicesManagerVoice = Struct.new(
|
|
21
|
+
:name,
|
|
22
|
+
:short_name,
|
|
23
|
+
:gender,
|
|
24
|
+
:locale,
|
|
25
|
+
:suggested_codec,
|
|
26
|
+
:friendly_name,
|
|
27
|
+
:status,
|
|
28
|
+
:voice_tag,
|
|
29
|
+
:language,
|
|
30
|
+
keyword_init: true
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
CommunicateState = Struct.new(
|
|
34
|
+
:partial_text,
|
|
35
|
+
:offset_compensation,
|
|
36
|
+
:last_duration_offset,
|
|
37
|
+
:stream_was_called,
|
|
38
|
+
keyword_init: true
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
class TTSConfig
|
|
42
|
+
attr_reader :voice, :rate, :volume, :pitch, :boundary
|
|
43
|
+
|
|
44
|
+
def initialize(voice, rate, volume, pitch, boundary)
|
|
45
|
+
validate_voice!(voice)
|
|
46
|
+
@voice = normalize_voice(voice)
|
|
47
|
+
@rate = validate_string_param('rate', rate, /^[+-]\d+%$/)
|
|
48
|
+
@volume = validate_string_param('volume', volume, /^[+-]\d+%$/)
|
|
49
|
+
@pitch = validate_string_param('pitch', pitch, /^[+-]\d+Hz$/)
|
|
50
|
+
@boundary = validate_boundary(boundary)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def validate_voice!(voice)
|
|
56
|
+
raise TypeError, 'voice must be a string' unless voice.is_a?(String)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def normalize_voice(voice)
|
|
60
|
+
# Check if voice is in short format (e.g., "en-US-EmmaMultilingualNeural")
|
|
61
|
+
match = voice.match(/^([a-z]{2,})-([A-Z]{2,})-(.+Neural)$/)
|
|
62
|
+
if match
|
|
63
|
+
lang = match[1]
|
|
64
|
+
region = match[2]
|
|
65
|
+
name = match[3]
|
|
66
|
+
|
|
67
|
+
# Handle names with hyphens (e.g., "en-US-JennyNeural-Angry")
|
|
68
|
+
if name.include?('-')
|
|
69
|
+
region = "#{region}-#{name[0...name.index('-')]}"
|
|
70
|
+
name = name[(name.index('-') + 1)..-1]
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
return "Microsoft Server Speech Text to Speech Voice (#{lang}-#{region}, #{name})"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
voice
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def validate_string_param(param_name, param_value, pattern)
|
|
80
|
+
raise TypeError, "#{param_name} must be a string" unless param_value.is_a?(String)
|
|
81
|
+
raise ArgumentError, "Invalid #{param_name} '#{param_value}'" unless param_value.match?(pattern)
|
|
82
|
+
|
|
83
|
+
param_value
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def validate_boundary(boundary)
|
|
87
|
+
raise TypeError, 'boundary must be a string' unless boundary.is_a?(String)
|
|
88
|
+
raise ArgumentError, "Invalid boundary '#{boundary}'" unless %w[WordBoundary SentenceBoundary].include?(boundary)
|
|
89
|
+
|
|
90
|
+
boundary
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'cgi'
|
|
4
|
+
|
|
5
|
+
module RbEdgeTTS
|
|
6
|
+
module Util
|
|
7
|
+
class << self
|
|
8
|
+
def get_headers_and_data(data, header_length)
|
|
9
|
+
raise TypeError, 'data must be a string' unless data.is_a?(String)
|
|
10
|
+
|
|
11
|
+
headers = {}
|
|
12
|
+
data[0...header_length].split("\r\n").each do |line|
|
|
13
|
+
key, value = line.split(':', 2)
|
|
14
|
+
headers[key.strip] = value.strip if key && value
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
[headers, data[(header_length + 2)..-1]]
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def remove_incompatible_characters(string)
|
|
21
|
+
return string.encode('utf-8') if string.is_a?(String)
|
|
22
|
+
return string.dup.force_encoding('utf-8') if string.is_a?(String)
|
|
23
|
+
|
|
24
|
+
string.to_s
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def connect_id
|
|
28
|
+
SecureRandom.uuid.gsub('-', '')
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def split_text_by_byte_length(text, byte_length)
|
|
32
|
+
raise TypeError, 'text must be a string' unless text.is_a?(String)
|
|
33
|
+
raise ArgumentError, 'byte_length must be greater than 0' if byte_length <= 0
|
|
34
|
+
|
|
35
|
+
encoded_text = text.encode('utf-8')
|
|
36
|
+
Enumerator.new do |yielder|
|
|
37
|
+
while encoded_text.bytesize > byte_length
|
|
38
|
+
split_at = find_last_newline_or_space_within_limit(encoded_text, byte_length)
|
|
39
|
+
|
|
40
|
+
split_at = find_safe_utf8_split_point(encoded_text, byte_length) if split_at.nil? || split_at < 0
|
|
41
|
+
|
|
42
|
+
split_at = adjust_split_point_for_xml_entity(encoded_text, split_at)
|
|
43
|
+
|
|
44
|
+
raise ArgumentError, 'Maximum byte length is too small or invalid text structure' if split_at < 0
|
|
45
|
+
|
|
46
|
+
chunk = encoded_text[0...split_at].strip
|
|
47
|
+
yielder << chunk unless chunk.empty?
|
|
48
|
+
|
|
49
|
+
encoded_text = encoded_text[(split_at > 0 ? split_at : 1)..-1]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
remaining_chunk = encoded_text.strip
|
|
53
|
+
yielder << remaining_chunk unless remaining_chunk.empty?
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def mkssml(tts_config, escaped_text)
|
|
58
|
+
escaped_text = escaped_text.encode('utf-8') if escaped_text.is_a?(String)
|
|
59
|
+
|
|
60
|
+
"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>" \
|
|
61
|
+
"<voice name='#{tts_config.voice}'>" \
|
|
62
|
+
"<prosody pitch='#{tts_config.pitch}' rate='#{tts_config.rate}' volume='#{tts_config.volume}'>" \
|
|
63
|
+
"#{escaped_text}" \
|
|
64
|
+
'</prosody>' \
|
|
65
|
+
'</voice>' \
|
|
66
|
+
'</speak>'
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def date_to_string
|
|
70
|
+
Time.now.utc.strftime('%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time)')
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def ssml_headers_plus_data(request_id, timestamp, ssml)
|
|
74
|
+
<<~HEADERS
|
|
75
|
+
X-RequestId:#{request_id}\r
|
|
76
|
+
Content-Type:application/ssml+xml\r
|
|
77
|
+
X-Timestamp:#{timestamp}Z\r
|
|
78
|
+
Path:ssml\r
|
|
79
|
+
\r
|
|
80
|
+
#{ssml}
|
|
81
|
+
HEADERS
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def escape_xml(text)
|
|
85
|
+
CGI.escapeHTML(text)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def unescape_xml(text)
|
|
89
|
+
CGI.unescapeHTML(text)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
def find_last_newline_or_space_within_limit(text, limit)
|
|
95
|
+
split_at = text.rindex("\n", [limit - 1, 0].max)
|
|
96
|
+
split_at = text.rindex(' ', [limit - 1, 0].max) if split_at.nil? || split_at < 0
|
|
97
|
+
split_at
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def find_safe_utf8_split_point(text_segment, byte_length = text_segment.bytesize)
|
|
101
|
+
split_at = [byte_length, text_segment.bytesize].min
|
|
102
|
+
while split_at > 0
|
|
103
|
+
begin
|
|
104
|
+
text_segment.byteslice(0, split_at).encode('utf-8')
|
|
105
|
+
return split_at
|
|
106
|
+
rescue Encoding::UndefinedConversionError
|
|
107
|
+
split_at -= 1
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
split_at
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def adjust_split_point_for_xml_entity(text, split_at)
|
|
114
|
+
while split_at > 0 && text[0...split_at].include?('&')
|
|
115
|
+
ampersand_index = text.rindex('&', split_at - 1)
|
|
116
|
+
break if text.index(';', ampersand_index)&.< split_at
|
|
117
|
+
|
|
118
|
+
split_at = ampersand_index
|
|
119
|
+
end
|
|
120
|
+
split_at
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module RbEdgeTTS
|
|
6
|
+
class VoicesManager
|
|
7
|
+
attr_accessor :voices
|
|
8
|
+
attr_reader :called_create
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
@voices = []
|
|
12
|
+
@called_create = false
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def self.create(custom_voices = nil)
|
|
16
|
+
voices_data = if custom_voices.nil?
|
|
17
|
+
EdgeTTS.list_voices_helper
|
|
18
|
+
else
|
|
19
|
+
custom_voices
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
voices_manager = new
|
|
23
|
+
voices_manager.voices = voices_data.map do |voice|
|
|
24
|
+
language = voice.locale.split('-').first
|
|
25
|
+
VoicesManagerVoice.new(
|
|
26
|
+
name: voice.name,
|
|
27
|
+
short_name: voice.short_name,
|
|
28
|
+
gender: voice.gender,
|
|
29
|
+
locale: voice.locale,
|
|
30
|
+
suggested_codec: voice.suggested_codec,
|
|
31
|
+
friendly_name: voice.friendly_name,
|
|
32
|
+
status: voice.status,
|
|
33
|
+
voice_tag: voice.voice_tag,
|
|
34
|
+
language: language
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
voices_manager.called_create = true
|
|
38
|
+
voices_manager
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def find(**kwargs)
|
|
42
|
+
raise 'VoicesManager.find() called before VoicesManager.create()' unless @called_create
|
|
43
|
+
|
|
44
|
+
voices.select do |voice|
|
|
45
|
+
kwargs.all? do |key, value|
|
|
46
|
+
send(key.to_s, voice) == value
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def gender(voice)
|
|
52
|
+
voice.gender
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def locale(voice)
|
|
56
|
+
voice.locale
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def language(voice)
|
|
60
|
+
voice.language
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.list_voices(connector: nil, proxy: nil)
|
|
65
|
+
list_voices_helper(connector: connector, proxy: proxy)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def self.list_voices_helper(connector: nil, proxy: nil)
|
|
69
|
+
url = "#{VOICE_LIST}&Sec-MS-GEC=#{DRM.generate_sec_ms_gec}&Sec-MS-GEC-Version=#{SEC_MS_GEC_VERSION}"
|
|
70
|
+
headers = DRM.headers_with_muid(VOICE_HEADERS)
|
|
71
|
+
|
|
72
|
+
require 'net/http'
|
|
73
|
+
require 'uri'
|
|
74
|
+
|
|
75
|
+
uri = URI(url)
|
|
76
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
77
|
+
http.use_ssl = true
|
|
78
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
|
79
|
+
|
|
80
|
+
request = Net::HTTP::Get.new(uri)
|
|
81
|
+
headers.each { |k, v| request[k] = v }
|
|
82
|
+
|
|
83
|
+
response = http.request(request)
|
|
84
|
+
raise UnexpectedResponse, "HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
85
|
+
|
|
86
|
+
body = JSON.parse(response.body)
|
|
87
|
+
body.map do |voice_data|
|
|
88
|
+
voice_data[:voice_tag] ||= {}
|
|
89
|
+
voice_data[:voice_tag][:content_categories] ||= []
|
|
90
|
+
voice_data[:voice_tag][:voice_personalities] ||= []
|
|
91
|
+
|
|
92
|
+
voice_data = voice_data.transform_keys { |k| k.to_s.gsub(/([a-z])([A-Z])/, '\1_\2').downcase.to_sym }
|
|
93
|
+
voice_data[:voice_tag] = VoiceTag.new(
|
|
94
|
+
content_categories: voice_data.dig(:voice_tag, :content_categories) || [],
|
|
95
|
+
voice_personalities: voice_data.dig(:voice_tag, :voice_personalities) || []
|
|
96
|
+
)
|
|
97
|
+
Voice.new(**voice_data)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
data/lib/rb_edge_tts.rb
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'eventmachine'
|
|
4
|
+
require 'faye/websocket'
|
|
5
|
+
require 'json'
|
|
6
|
+
require 'openssl'
|
|
7
|
+
require 'securerandom'
|
|
8
|
+
require 'time'
|
|
9
|
+
|
|
10
|
+
require_relative 'rb_edge_tts/typing'
|
|
11
|
+
require_relative 'rb_edge_tts/constants'
|
|
12
|
+
require_relative 'rb_edge_tts/drm'
|
|
13
|
+
require_relative 'rb_edge_tts/util'
|
|
14
|
+
require_relative 'rb_edge_tts/srt_composer'
|
|
15
|
+
require_relative 'rb_edge_tts/submaker'
|
|
16
|
+
require_relative 'rb_edge_tts/voices_manager'
|
|
17
|
+
require_relative 'rb_edge_tts/exceptions'
|
|
18
|
+
require_relative 'rb_edge_tts/communicate'
|
|
19
|
+
require_relative 'rb_edge_tts/version'
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
require 'async'
|
|
5
|
+
require 'async/http'
|
|
6
|
+
require 'async/io/stream'
|
|
7
|
+
require 'json'
|
|
8
|
+
require 'openssl'
|
|
9
|
+
require 'time'
|
|
10
|
+
|
|
11
|
+
module RbEdgeTTS
|
|
12
|
+
VERSION = '7.2.7'
|
|
13
|
+
VERSION_INFO = VERSION.split('.').map(&:to_i).freeze
|
|
14
|
+
|
|
15
|
+
# Test basic functionality
|
|
16
|
+
begin
|
|
17
|
+
puts 'Module loaded successfully'
|
|
18
|
+
puts "Version: #{VERSION}"
|
|
19
|
+
puts "Sec-MS-GEC: #{DRM.generate_sec_ms_gec}"
|
|
20
|
+
puts "MUID: #{DRM.generate_muid}"
|
|
21
|
+
rescue StandardError => e
|
|
22
|
+
puts "Error loading module: #{e.message}"
|
|
23
|
+
end
|
|
24
|
+
end
|
data/rb-edge-tts.gemspec
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/rb_edge_tts/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'rb-edge-tts'
|
|
7
|
+
spec.version = RbEdgeTTS::VERSION
|
|
8
|
+
spec.authors = ["Peng Zhang"]
|
|
9
|
+
spec.email = ["zpregister@gmail.com"]
|
|
10
|
+
|
|
11
|
+
spec.summary = 'Ruby gem for Microsoft Edge\'s online text-to-speech service'
|
|
12
|
+
spec.description = 'A Ruby library and CLI tool to use Microsoft Edge\'s online TTS service from within Ruby code or using the provided rb-edge-tts or rb-edge-playback commands.'
|
|
13
|
+
spec.homepage = 'https://github.com/yourusername/rb-edge-tts'
|
|
14
|
+
spec.license = 'LGPL-3.0-or-later'
|
|
15
|
+
spec.required_ruby_version = '>= 3.0.0'
|
|
16
|
+
|
|
17
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
|
18
|
+
spec.metadata['source_code_uri'] = 'https://github.com/yourusername/rb-edge-tts'
|
|
19
|
+
spec.metadata['changelog_uri'] = 'https://github.com/yourusername/rb-edge-tts/blob/main/CHANGELOG.md'
|
|
20
|
+
|
|
21
|
+
spec.files = Dir.glob('{lib,exe,examples}/**/*') + %w[
|
|
22
|
+
LICENSE
|
|
23
|
+
README.md
|
|
24
|
+
Rakefile
|
|
25
|
+
rb-edge-tts.gemspec
|
|
26
|
+
Gemfile
|
|
27
|
+
]
|
|
28
|
+
spec.bindir = 'exe'
|
|
29
|
+
spec.executables = %w[rb-edge-tts rb-edge-playback]
|
|
30
|
+
spec.require_paths = ['lib']
|
|
31
|
+
|
|
32
|
+
spec.add_dependency 'eventmachine', '~> 1.2'
|
|
33
|
+
spec.add_dependency 'faye-websocket', '~> 0.11'
|
|
34
|
+
spec.add_dependency 'json', '~> 2.6'
|
|
35
|
+
spec.add_dependency 'terminal-table', '~> 3.0'
|
|
36
|
+
|
|
37
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
|
38
|
+
spec.add_development_dependency 'rspec', '~> 3.12'
|
|
39
|
+
spec.add_development_dependency 'rubocop', '~> 1.50'
|
|
40
|
+
spec.add_development_dependency 'rubocop-rspec', '~> 2.20'
|
|
41
|
+
spec.add_development_dependency 'simplecov', '~> 0.22'
|
|
42
|
+
end
|