rb-edge-tts 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative '../lib/rb_edge_tts'
5
+ require_relative '../lib/edge_playback/version'
6
+ require 'optparse'
7
+ require 'tempfile'
8
+ require 'open3'
9
+
10
+ module EdgePlayback
11
+ module CLI
12
+ class << self
13
+ def run(args)
14
+ options = parse_options(args)
15
+ check_dependencies(options[:use_mpv])
16
+
17
+ debug = ENV['EDGE_PLAYBACK_DEBUG']
18
+ keep = ENV['EDGE_PLAYBACK_KEEP_TEMP']
19
+ mp3_file = ENV['EDGE_PLAYBACK_MP3_FILE']
20
+ srt_file = ENV['EDGE_PLAYBACK_SRT_FILE']
21
+
22
+ begin
23
+ mp3_file, srt_file = create_temp_files(options[:use_mpv], mp3_file, srt_file, debug)
24
+ run_edge_tts(mp3_file, srt_file, args)
25
+ play_media(options[:use_mpv], mp3_file, srt_file)
26
+ ensure
27
+ cleanup(mp3_file, srt_file, keep)
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def parse_options(args)
34
+ options = { use_mpv: !Gem.win_platform? }
35
+ remaining_args = []
36
+
37
+ while (arg = args.shift)
38
+ case arg
39
+ when '--mpv'
40
+ options[:use_mpv] = true
41
+ when '--version'
42
+ puts "rb-edge-playback #{EdgePlayback::VERSION}"
43
+ exit 0
44
+ when '-h', '--help'
45
+ puts "Usage: rb-edge-playback [options] [rb-edge-tts options]"
46
+ puts ""
47
+ puts "Options:"
48
+ puts " --mpv Use mpv to play audio"
49
+ puts " --version Show version"
50
+ puts " -h, --help Show this help message"
51
+ puts ""
52
+ puts "See 'rb-edge-tts --help' for additional arguments"
53
+ exit 0
54
+ else
55
+ remaining_args << arg
56
+ end
57
+ end
58
+
59
+ # Restore request arguments
60
+ remaining_args.each { |a| args << a }
61
+
62
+ options
63
+ end
64
+
65
+ def check_dependencies(use_mpv)
66
+ missing = []
67
+
68
+ missing << 'rb-edge-tts' unless system('which rb-edge-tts > /dev/null 2>&1')
69
+ missing << 'mpv' if use_mpv && !system('which mpv > /dev/null 2>&1')
70
+
71
+ return if missing.empty?
72
+
73
+ missing.each { |dep| warn "#{dep} is not installed." }
74
+ warn 'Please install the missing dependencies.'
75
+ exit 1
76
+ end
77
+
78
+ def create_temp_files(use_mpv, mp3_fname, srt_fname, debug)
79
+ unless mp3_fname
80
+ media = Tempfile.new(['rb-edge-playback-', '.mp3'])
81
+ media.close
82
+ mp3_fname = media.path
83
+ puts "Media file: #{mp3_fname}" if debug
84
+ end
85
+
86
+ unless srt_fname || !use_mpv
87
+ subtitle = Tempfile.new(['rb-edge-playback-', '.srt'])
88
+ subtitle.close
89
+ srt_fname = subtitle.path
90
+ puts "Subtitle file: #{srt_fname}" if debug
91
+ end
92
+
93
+ [mp3_fname, srt_fname]
94
+ end
95
+
96
+ def run_edge_tts(mp3_fname, srt_fname, tts_args)
97
+ edge_tts_cmd = ['rb-edge-tts', "--write-media=#{mp3_fname}"]
98
+ edge_tts_cmd << "--write-subtitles=#{srt_fname}" if srt_fname
99
+ edge_tts_cmd.concat(tts_args)
100
+
101
+ status = system(*edge_tts_cmd)
102
+ raise "rb-edge-tts failed with status #{$?.exitstatus}" unless status
103
+ end
104
+
105
+ def play_media(use_mpv, mp3_fname, srt_fname)
106
+ if Gem.win_platform? && !use_mpv
107
+ play_mp3_win32(mp3_fname)
108
+ return
109
+ end
110
+
111
+ mpv_cmd = %w[mpv --msg-level=all=error,statusline=status]
112
+ mpv_cmd << "--sub-file=#{srt_fname}" if srt_fname
113
+ mpv_cmd << mp3_fname
114
+
115
+ status = system(*mpv_cmd)
116
+ warn "mpv failed with status #{$?.exitstatus}" unless status
117
+ end
118
+
119
+ def play_mp3_win32(mp3_fname)
120
+ require 'fiddle'
121
+
122
+ kernel32 = Fiddle::Handle.new('kernel32')
123
+ winmm = Fiddle::Handle.new('winmm')
124
+
125
+ get_short_path_name_w = Fiddle::Function.new(
126
+ kernel32['GetShortPathNameW'],
127
+ [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP, Fiddle::TYPE_DWORD],
128
+ Fiddle::TYPE_DWORD
129
+ )
130
+
131
+ get_long_path_name_w = Fiddle::Function.new(
132
+ kernel32['GetLongPathNameW'],
133
+ [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP, Fiddle::TYPE_DWORD],
134
+ Fiddle::TYPE_DWORD
135
+ )
136
+
137
+ mci_send_string_w = Fiddle::Function.new(
138
+ winmm['mciSendStringW'],
139
+ [Fiddle::TYPE_VOIDP, Fiddle::TYPE_VOIDP, Fiddle::TYPE_DWORD, Fiddle::TYPE_VOIDP],
140
+ Fiddle::TYPE_DWORD
141
+ )
142
+
143
+ mp3_path = mp3_fname.encode('utf-16le')
144
+ buffer = "\0" * 260 * 2
145
+
146
+ get_short_path_name_w.call(mp3_path, buffer, 260)
147
+ short_name = buffer.strip.encode('utf-8')
148
+
149
+ mci_send_string_w.call('Close All', nil, 0, nil)
150
+ mci_send_string_w.call("Open \"#{short_name}\" Type MPEGVideo Alias theMP3", nil, 0, nil)
151
+ mci_send_string_w.call('Play theMP3 Wait', nil, 0, nil)
152
+ mci_send_string_w.call('Close theMP3', nil, 0, nil)
153
+ rescue LoadError => e
154
+ warn "Error loading Windows libraries: #{e.message}"
155
+ exit 1
156
+ end
157
+
158
+ def cleanup(mp3_fname, srt_fname, keep)
159
+ return if keep
160
+
161
+ File.delete(mp3_fname) if mp3_fname && File.exist?(mp3_fname)
162
+ File.delete(srt_fname) if srt_fname && File.exist?(srt_fname)
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ EdgePlayback::CLI.run(ARGV)
data/exe/rb-edge-tts ADDED
@@ -0,0 +1,192 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative '../lib/rb_edge_tts'
5
+ require 'optparse'
6
+ require 'fileutils'
7
+
8
+ module RbEdgeTTS
9
+ module CLI
10
+ class << self
11
+ def run(args)
12
+ options = parse_options(args)
13
+
14
+ if options[:list_voices]
15
+ print_voices(options)
16
+ exit 0
17
+ end
18
+
19
+ text = get_text(options)
20
+ run_tts(text, options)
21
+ end
22
+
23
+ private
24
+
25
+ def parse_options(args)
26
+ options = {
27
+ voice: RbEdgeTTS::DEFAULT_VOICE,
28
+ rate: '+0%',
29
+ volume: '+0%',
30
+ pitch: '+0Hz'
31
+ }
32
+
33
+ OptionParser.new do |opts|
34
+ opts.banner = "Usage: rb-edge-tts [options]"
35
+ opts.separator ""
36
+ opts.separator "Options:"
37
+
38
+ opts.on('-t', '--text TEXT', 'What TTS will say') do |t|
39
+ options[:text] = t
40
+ end
41
+
42
+ opts.on('-f', '--file FILE', 'Read text from file') do |f|
43
+ options[:file] = f
44
+ end
45
+
46
+ opts.on('-v', '--voice VOICE', "Voice for TTS (default: #{DEFAULT_VOICE})") do |v|
47
+ options[:voice] = v
48
+ end
49
+
50
+ opts.on('-l', '--list-voices', 'List available voices and exit') do
51
+ options[:list_voices] = true
52
+ end
53
+
54
+ opts.on('--rate RATE', 'Set TTS rate (e.g., +20%, -50%)') do |r|
55
+ options[:rate] = r
56
+ end
57
+
58
+ opts.on('--volume VOLUME', 'Set TTS volume (e.g., +10%, -20%)') do |v|
59
+ options[:volume] = v
60
+ end
61
+
62
+ opts.on('--pitch PITCH', 'Set TTS pitch (e.g., +5Hz, -10Hz)') do |p|
63
+ options[:pitch] = p
64
+ end
65
+
66
+ opts.on('--write-media FILE', 'Write media output to file instead of stdout') do |w|
67
+ options[:write_media] = w
68
+ end
69
+
70
+ opts.on('--write-subtitles FILE', 'Write subtitle output to file instead of stderr') do |w|
71
+ options[:write_subtitles] = w
72
+ end
73
+
74
+ opts.on('--proxy URL', 'Use a proxy for TTS and voice list') do |p|
75
+ options[:proxy] = p
76
+ end
77
+
78
+ opts.on('--verbose', 'Show debug information') do
79
+ options[:verbose] = true
80
+ end
81
+
82
+ opts.on('--version', 'Show version') do
83
+ puts "rb-edge-tts #{VERSION}"
84
+ exit 0
85
+ end
86
+
87
+ opts.on('-h', '--help', 'Show this help message') do
88
+ puts opts
89
+ exit 0
90
+ end
91
+ end.parse!(args)
92
+
93
+ raise OptionParser::MissingArgument, 'Must specify --text or --file' unless options[:text] || options[:file] || options[:list_voices]
94
+
95
+ options
96
+ rescue OptionParser::ParseError => e
97
+ warn "Error: #{e.message}"
98
+ warn "Use --help for usage information"
99
+ exit 1
100
+ end
101
+
102
+ def get_text(options)
103
+ if options[:file]
104
+ if options[:file] == '-' || options[:file] == '/dev/stdin'
105
+ STDIN.read
106
+ else
107
+ File.read(options[:file], encoding: 'utf-8')
108
+ end
109
+ else
110
+ options[:text]
111
+ end
112
+ end
113
+
114
+ def print_voices(options)
115
+ voices = RbEdgeTTS.list_voices(proxy: options[:proxy])
116
+ voices = voices.sort_by(&:short_name)
117
+
118
+ require 'terminal-table'
119
+ table = Terminal::Table.new(
120
+ headings: %w[Name Gender ContentCategories VoicePersonalities],
121
+ rows: voices.map do |v|
122
+ [
123
+ v.short_name,
124
+ v.gender,
125
+ v.voice_tag.content_categories.join(', '),
126
+ v.voice_tag.voice_personalities.join(', ')
127
+ ]
128
+ end
129
+ )
130
+
131
+ puts table
132
+ end
133
+
134
+ def run_tts(text, options)
135
+ if STDIN.tty? && STDOUT.tty? && !options[:write_media]
136
+ warn 'Warning: TTS output will be written to the terminal.'
137
+ warn 'Use --write-media to write to a file.'
138
+ warn 'Press Ctrl+C to cancel the operation.'
139
+ warn 'Press Enter to continue.'
140
+ STDIN.gets
141
+ end
142
+
143
+ communicate = RbEdgeTTS::Communicate.new(
144
+ text,
145
+ options[:voice],
146
+ rate: options[:rate],
147
+ volume: options[:volume],
148
+ pitch: options[:pitch],
149
+ proxy: options[:proxy],
150
+ verbose: options[:verbose]
151
+ )
152
+
153
+ submaker = RbEdgeTTS::SubMaker.new
154
+
155
+ audio_file = if options[:write_media] && options[:write_media] != '-'
156
+ File.open(options[:write_media], 'wb')
157
+ else
158
+ STDOUT.binmode
159
+ STDOUT
160
+ end
161
+
162
+ sub_file = if options[:write_subtitles] && options[:write_subtitles] != '-'
163
+ File.open(options[:write_subtitles], 'w', encoding: 'utf-8')
164
+ elsif options[:write_subtitles] == '-'
165
+ STDERR
166
+ else
167
+ nil
168
+ end
169
+
170
+ begin
171
+ communicate.stream do |chunk|
172
+ if chunk.type == 'audio'
173
+ audio_file.write(chunk.data)
174
+ elsif %w[WordBoundary SentenceBoundary].include?(chunk.type) && sub_file
175
+ submaker.feed(chunk)
176
+ end
177
+ end
178
+
179
+ sub_file.puts(submaker.get_srt) if sub_file
180
+ ensure
181
+ audio_file.close if audio_file != STDOUT
182
+ sub_file.close if sub_file && sub_file != STDERR
183
+ end
184
+ rescue Interrupt
185
+ warn "\nOperation canceled."
186
+ exit 1
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ RbEdgeTTS::CLI.run(ARGV)
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module EdgePlayback
4
+ VERSION = RbEdgeTTS::VERSION
5
+ end
@@ -0,0 +1,336 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eventmachine'
4
+ require 'faye/websocket'
5
+ require 'json'
6
+ require 'openssl'
7
+ require 'time'
8
+ require 'securerandom'
9
+
10
+ require_relative 'typing'
11
+ require_relative 'constants'
12
+ require_relative 'drm'
13
+ require_relative 'util'
14
+ require_relative 'srt_composer'
15
+ require_relative 'submaker'
16
+ require_relative 'voices_manager'
17
+
18
+ module RbEdgeTTS
19
+ class Communicate
20
+ attr_accessor :texts, :proxy, :state, :tts_config
21
+
22
+ def initialize(text,
23
+ voice = RbEdgeTTS::DEFAULT_VOICE,
24
+ rate: '+0%',
25
+ volume: '+0%',
26
+ pitch: '+0Hz',
27
+ boundary: 'SentenceBoundary',
28
+ proxy: nil,
29
+ connect_timeout: 10,
30
+ receive_timeout: 60,
31
+ verbose: false)
32
+ raise TypeError, 'text must be a string' unless text.is_a?(String)
33
+
34
+ @tts_config = TTSConfig.new(voice, rate, volume, pitch, boundary)
35
+
36
+ @texts = Util.split_text_by_byte_length(Util.escape_xml(Util.remove_incompatible_characters(text)), 4096).to_a
37
+
38
+ @proxy = proxy
39
+ raise TypeError, 'proxy must be a string' if proxy && !proxy.is_a?(String)
40
+
41
+ raise TypeError, 'connect_timeout must be an integer' unless connect_timeout.is_a?(Integer)
42
+ raise TypeError, 'receive_timeout must be an integer' unless receive_timeout.is_a?(Integer)
43
+
44
+ @connect_timeout = connect_timeout
45
+ @receive_timeout = receive_timeout
46
+ @verbose = verbose
47
+
48
+ @state = CommunicateState.new(
49
+ partial_text: '',
50
+ offset_compensation: 0,
51
+ last_duration_offset: 0,
52
+ stream_was_called: false
53
+ )
54
+ end
55
+
56
+ def stream(&block)
57
+ raise 'stream can only be called once.' if @state.stream_was_called
58
+
59
+ @state.stream_was_called = true
60
+
61
+ @texts.each do |partial_text|
62
+ @state.partial_text = partial_text
63
+ stream_internal(&block)
64
+ end
65
+ end
66
+
67
+ def stream_sync
68
+ queue = Thread::Queue.new
69
+
70
+ thread = Thread.new do
71
+ stream do |chunk|
72
+ queue.push(chunk)
73
+ end
74
+ queue.push(nil)
75
+ end
76
+
77
+ Enumerator.new do |yielder|
78
+ loop do
79
+ chunk = queue.pop
80
+ break if chunk.nil?
81
+
82
+ yielder << chunk
83
+ end
84
+ ensure
85
+ thread&.join
86
+ end
87
+ end
88
+
89
+ def save(audio_fname, metadata_fname = nil, &block)
90
+ raise TypeError, 'audio_fname must be a string' unless audio_fname.is_a?(String)
91
+ raise TypeError, 'metadata_fname must be a string' if metadata_fname && !metadata_fname.is_a?(String)
92
+
93
+ File.open(audio_fname, 'wb') do |audio_file|
94
+ metadata_file = metadata_fname ? File.open(metadata_fname, 'w', encoding: 'utf-8') : nil
95
+
96
+ stream do |chunk|
97
+ if chunk.type == 'audio'
98
+ audio_file.write(chunk.data)
99
+ elsif metadata_file && %w[WordBoundary SentenceBoundary].include?(chunk.type)
100
+ metadata_file.puts(JSON.generate(chunk.to_h))
101
+ block.call(chunk) if block_given?
102
+ end
103
+ end
104
+ ensure
105
+ metadata_file&.close if metadata_file && metadata_file != audio_file
106
+ audio_file.close
107
+ end
108
+ end
109
+
110
+ def save_sync(audio_fname, metadata_fname = nil, &block)
111
+ raise TypeError, 'audio_fname must be a string' unless audio_fname.is_a?(String)
112
+ raise TypeError, 'metadata_fname must be a string' if metadata_fname && !metadata_fname.is_a?(String)
113
+
114
+ Thread.new { save(audio_fname, metadata_fname, &block) }.join
115
+ end
116
+
117
+ private
118
+
119
+ def stream_internal
120
+ audio_was_received = false
121
+ @ws = nil
122
+
123
+ begin
124
+ EventMachine.run do
125
+ url = "#{RbEdgeTTS::WSS_URL}&ConnectionId=#{Util.connect_id}&Sec-MS-GEC=#{DRM.generate_sec_ms_gec}&Sec-MS-GEC-Version=#{RbEdgeTTS::SEC_MS_GEC_VERSION}"
126
+
127
+ options = {
128
+ headers: DRM.headers_with_muid(RbEdgeTTS::WSS_HEADERS),
129
+ tls: {
130
+ verify_peer: true,
131
+ ca_file: OpenSSL::X509::DEFAULT_CERT_FILE
132
+ }
133
+ }
134
+
135
+ @ws = Faye::WebSocket::Client.new(url, [], options)
136
+
137
+ @ws.on :open do |_event|
138
+ log 'WebSocket connection opened'
139
+ send_command_request(@ws)
140
+ send_ssml_request(@ws)
141
+ end
142
+
143
+ @ws.on :message do |event|
144
+ handle_message(event.data) do |result|
145
+ if result.type == 'audio'
146
+ audio_was_received = true
147
+ end
148
+ yield result
149
+ end
150
+ end
151
+
152
+
153
+ @ws.on :close do |event|
154
+ # 1006 is common after successful transmission
155
+ log "WebSocket connection closed: #{event.code} #{event.reason}" unless event.code == 1006
156
+ EventMachine.stop
157
+ end
158
+
159
+ @ws.on :error do |event|
160
+ # Ignore ECONNRESET as it often happens at the end of stream
161
+ log "WebSocket Error: #{event.message}" unless event.message.to_s.include?('ECONNRESET')
162
+ EventMachine.stop
163
+ end
164
+
165
+ EventMachine.add_timer(@receive_timeout) do
166
+ puts "Timeout: No response in #{@receive_timeout} seconds"
167
+ EventMachine.stop
168
+ end
169
+ end
170
+ rescue StandardError => e
171
+ raise unless e.message.include?('403')
172
+
173
+ DRM.handle_client_response_error(e)
174
+ retry
175
+ end
176
+ end
177
+
178
+ def send_command_request(ws)
179
+ log 'Sending command request:'
180
+ request = DRM.command_request(@tts_config.boundary)
181
+ log request
182
+ ws.send(request)
183
+ end
184
+
185
+ def send_ssml_request(ws)
186
+ ssml = Util.mkssml(@tts_config, @state.partial_text)
187
+
188
+ request = "X-RequestId:#{Util.connect_id}\r\n" \
189
+ "Content-Type:application/ssml+xml\r\n" \
190
+ "X-Timestamp:#{Util.date_to_string}Z\r\n" \
191
+ "Path:ssml\r\n\r\n" \
192
+ "#{ssml}"
193
+
194
+ ws.send(request)
195
+ end
196
+
197
+ def handle_message(data, &block)
198
+ case data
199
+ when String
200
+ handle_text_message(data, &block)
201
+ when Array
202
+ handle_binary_message(data, &block)
203
+ else
204
+ handle_binary_message(data, &block)
205
+ end
206
+ end
207
+
208
+ def handle_text_message(data, &block)
209
+ return if data.nil? || data.empty?
210
+
211
+ header_end = data.index("\r\n\r\n")
212
+ unless header_end
213
+ if data.length > 2
214
+ handle_binary_message(data.bytes, &block)
215
+ return
216
+ end
217
+ end
218
+ return unless header_end
219
+
220
+ headers = data[0...header_end]
221
+ body = data[(header_end + 4)..-1]
222
+
223
+ path = extract_header_value(headers, 'Path')
224
+ if path.nil?
225
+ if headers.include?('Path:audio')
226
+ handle_binary_message(data.bytes, &block)
227
+ return
228
+ end
229
+ end
230
+ return unless path
231
+
232
+ case path
233
+ when 'audio.metadata'
234
+ handle_metadata(body, &block)
235
+ update_last_duration_offset(body)
236
+ when 'audio'
237
+ handle_binary_message(data.bytes, &block)
238
+ when 'turn.end'
239
+ update_offset_compensation
240
+ @ws&.close
241
+ when 'response', 'turn.start', 'path', 'SessionEnd'
242
+ nil
243
+ else
244
+ raise UnknownResponse, "Unknown path received: #{path}"
245
+ end
246
+ end
247
+
248
+ def handle_binary_message(data)
249
+ return if data.nil? || data.length < 2
250
+
251
+ header_length = (data[0] << 8) | data[1]
252
+
253
+ if header_length > data.length
254
+ raise UnexpectedResponse, 'The header length is greater than the length of the data.'
255
+ end
256
+
257
+ header_end = 2 + header_length
258
+ headers = data[2...header_end].pack('C*').force_encoding('utf-8')
259
+ body = data[header_end..-1].pack('C*')
260
+
261
+ path = extract_header_value(headers, 'Path')
262
+
263
+ raise UnexpectedResponse, "Received binary message, but the path is not audio: #{path}" if path != 'audio'
264
+
265
+ content_type = extract_header_value(headers, 'Content-Type')
266
+
267
+ if content_type && content_type != 'audio/mpeg'
268
+ raise UnexpectedResponse, "Received binary message, but with an unexpected Content-Type: #{content_type}"
269
+ end
270
+
271
+ return if content_type.nil? && body.nil?
272
+
273
+ return if body.nil? || body.empty?
274
+
275
+ yield TTSChunk.new(type: 'audio', data: body)
276
+ end
277
+
278
+ def handle_metadata(data)
279
+ return if data.nil? || data.empty?
280
+
281
+ begin
282
+ metadata = JSON.parse(data)
283
+ return unless metadata.is_a?(Hash) && metadata['Metadata'].is_a?(Array)
284
+
285
+ metadata['Metadata'].each do |meta_obj|
286
+ meta_type = meta_obj['Type']
287
+ next unless %w[WordBoundary SentenceBoundary].include?(meta_type)
288
+
289
+ data_obj = meta_obj['Data']
290
+ current_offset = (data_obj['Offset'] || 0) + @state.offset_compensation
291
+ current_duration = data_obj['Duration'] || 0
292
+
293
+ yield TTSChunk.new(
294
+ type: meta_type,
295
+ offset: current_offset,
296
+ duration: current_duration,
297
+ text: Util.unescape_xml(data_obj.dig('text', 'Text') || '')
298
+ )
299
+ end
300
+ rescue JSON::ParserError => e
301
+ puts "JSON parse error: #{e.message}"
302
+ end
303
+ end
304
+
305
+ def extract_header_value(headers, key)
306
+ return nil unless headers.is_a?(String)
307
+
308
+ match = headers.match(/^#{Regexp.escape(key)}:([^\r\n]*)/i)
309
+ match ? match[1].strip : nil
310
+ end
311
+
312
+ def update_last_duration_offset(data)
313
+ metadata = JSON.parse(data)
314
+ return unless metadata.is_a?(Hash) && metadata['Metadata'].is_a?(Array)
315
+
316
+ metadata['Metadata'].each do |meta_obj|
317
+ next unless %w[WordBoundary SentenceBoundary].include?(meta_obj['Type'])
318
+
319
+ data_obj = meta_obj['Data']
320
+ @state.last_duration_offset = (data_obj['Offset'] || 0) + (data_obj['Duration'] || 0)
321
+ end
322
+ rescue JSON::ParserError
323
+ nil
324
+ end
325
+
326
+ def update_offset_compensation
327
+ @state.offset_compensation = @state.last_duration_offset
328
+ @state.offset_compensation += 8_750_000
329
+ end
330
+
331
+ def log(message)
332
+ puts message if @verbose
333
+ end
334
+ end
335
+ end
336
+