youtube-rb 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +703 -0
- data/Rakefile +6 -0
- data/bin/console +10 -0
- data/bin/setup +8 -0
- data/lib/youtube-rb/client.rb +160 -0
- data/lib/youtube-rb/downloader.rb +632 -0
- data/lib/youtube-rb/extractor.rb +425 -0
- data/lib/youtube-rb/options.rb +186 -0
- data/lib/youtube-rb/version.rb +3 -0
- data/lib/youtube-rb/video_info.rb +179 -0
- data/lib/youtube-rb/ytdlp_wrapper.rb +269 -0
- data/lib/youtube-rb.rb +69 -0
- data/spec/client_spec.rb +514 -0
- data/spec/download_with_mocks_spec.rb +216 -0
- data/spec/downloader_spec.rb +774 -0
- data/spec/fixtures/first_video_info.json +19 -0
- data/spec/fixtures/rickroll_full_info.json +73 -0
- data/spec/fixtures/rickroll_info.json +73 -0
- data/spec/fixtures/rickroll_segment_info.json +9 -0
- data/spec/integration/ytdlp_integration_spec.rb +109 -0
- data/spec/real_download_spec.rb +175 -0
- data/spec/spec_helper.rb +31 -0
- data/spec/support/fixtures_helper.rb +109 -0
- data/spec/support/mocking_helper.rb +21 -0
- data/spec/support/webmock_helper.rb +132 -0
- data/spec/youtube_rb_spec.rb +200 -0
- data/spec/ytdlp_wrapper_spec.rb +178 -0
- data/youtube-rb.gemspec +39 -0
- metadata +229 -0
|
@@ -0,0 +1,632 @@
|
|
|
1
|
+
require 'open3'
|
|
2
|
+
require 'fileutils'
|
|
3
|
+
require 'faraday'
|
|
4
|
+
|
|
5
|
+
module YoutubeRb
|
|
6
|
+
class Downloader
|
|
7
|
+
class DownloadError < StandardError; end
|
|
8
|
+
|
|
9
|
+
attr_reader :url, :options, :video_info
|
|
10
|
+
|
|
11
|
+
def initialize(url, options = Options.new)
|
|
12
|
+
@url = url
|
|
13
|
+
@options = options.is_a?(Options) ? options : Options.new(**options)
|
|
14
|
+
@extractor = Extractor.new(url, @options.to_h)
|
|
15
|
+
@ytdlp_wrapper = nil
|
|
16
|
+
@video_info = nil
|
|
17
|
+
@tried_ytdlp = false
|
|
18
|
+
@tried_ruby = false
|
|
19
|
+
@cached_video_path = nil # For caching full video when downloading multiple segments
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Download full video
|
|
23
|
+
def download
|
|
24
|
+
ensure_output_directory
|
|
25
|
+
|
|
26
|
+
# Choose backend: yt-dlp or pure Ruby
|
|
27
|
+
if should_use_ytdlp?
|
|
28
|
+
download_with_ytdlp
|
|
29
|
+
else
|
|
30
|
+
download_with_ruby
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Download video segment (time range)
|
|
35
|
+
def download_segment(start_time, end_time, output_file = nil)
|
|
36
|
+
raise ArgumentError, "Start time must be less than end time" if start_time >= end_time
|
|
37
|
+
|
|
38
|
+
duration = end_time - start_time
|
|
39
|
+
unless valid_segment_duration?(duration)
|
|
40
|
+
raise ArgumentError, "Segment duration must be between #{@options.min_segment_duration} and #{@options.max_segment_duration} seconds, got: #{duration}"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
ensure_output_directory
|
|
44
|
+
|
|
45
|
+
# Always use yt-dlp for segment downloads (most efficient and reliable)
|
|
46
|
+
unless ytdlp_available?
|
|
47
|
+
raise DownloadError, "yt-dlp is required for segment downloads. Please install yt-dlp."
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
download_segment_with_ytdlp(start_time, end_time, output_file)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Download multiple video segments (batch processing)
|
|
54
|
+
# @param segments [Array<Hash>] Array of segment definitions: [{start: 0, end: 30, output_file: 'seg1.mp4'}, ...]
|
|
55
|
+
# @return [Array<String>] Paths to downloaded segment files
|
|
56
|
+
def download_segments(segments)
|
|
57
|
+
raise ArgumentError, "segments must be an Array" unless segments.is_a?(Array)
|
|
58
|
+
raise ArgumentError, "segments array cannot be empty" if segments.empty?
|
|
59
|
+
|
|
60
|
+
# Validate all segments first
|
|
61
|
+
segments.each_with_index do |seg, idx|
|
|
62
|
+
raise ArgumentError, "Segment #{idx} must be a Hash with :start and :end keys" unless seg.is_a?(Hash) && seg[:start] && seg[:end]
|
|
63
|
+
|
|
64
|
+
start_time = seg[:start]
|
|
65
|
+
end_time = seg[:end]
|
|
66
|
+
raise ArgumentError, "Segment #{idx}: start time must be less than end time" if start_time >= end_time
|
|
67
|
+
|
|
68
|
+
duration = end_time - start_time
|
|
69
|
+
unless valid_segment_duration?(duration)
|
|
70
|
+
raise ArgumentError, "Segment #{idx}: duration must be between #{@options.min_segment_duration} and #{@options.max_segment_duration} seconds, got: #{duration}"
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
ensure_output_directory
|
|
75
|
+
|
|
76
|
+
# Always use yt-dlp for batch segment downloads (most efficient and reliable)
|
|
77
|
+
unless ytdlp_available?
|
|
78
|
+
raise DownloadError, "yt-dlp is required for batch segment downloads. Please install yt-dlp."
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
download_segments_with_ytdlp(segments)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Download only subtitles
|
|
85
|
+
def download_subtitles_only(langs = nil)
|
|
86
|
+
ensure_output_directory
|
|
87
|
+
@video_info = @extractor.extract_info
|
|
88
|
+
|
|
89
|
+
langs ||= @options.subtitle_langs
|
|
90
|
+
download_subtitles(langs)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Get video information without downloading
|
|
94
|
+
def info
|
|
95
|
+
@video_info ||= @extractor.extract_info
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
def should_use_ytdlp?
|
|
101
|
+
# Use yt-dlp if:
|
|
102
|
+
# 1. Explicitly requested via options
|
|
103
|
+
# 2. yt-dlp is available
|
|
104
|
+
if @options.use_ytdlp && ytdlp_available?
|
|
105
|
+
return true
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Don't use yt-dlp if explicitly disabled
|
|
109
|
+
if @options.use_ytdlp == false
|
|
110
|
+
return false
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Default: use yt-dlp if available for better reliability
|
|
114
|
+
ytdlp_available?
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def ytdlp_available?
|
|
118
|
+
@ytdlp_available ||= YtdlpWrapper.available?
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def ytdlp_wrapper
|
|
122
|
+
@ytdlp_wrapper ||= YtdlpWrapper.new(@options)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def download_with_ytdlp
|
|
126
|
+
log "Using yt-dlp backend for download"
|
|
127
|
+
@tried_ytdlp = true
|
|
128
|
+
|
|
129
|
+
begin
|
|
130
|
+
output_file = ytdlp_wrapper.download(@url)
|
|
131
|
+
log "Downloaded successfully with yt-dlp: #{output_file}"
|
|
132
|
+
output_file
|
|
133
|
+
rescue YtdlpWrapper::YtdlpError => e
|
|
134
|
+
handle_ytdlp_error(e)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def download_with_ruby
|
|
139
|
+
log "Using pure Ruby backend for download"
|
|
140
|
+
@tried_ruby = true
|
|
141
|
+
|
|
142
|
+
begin
|
|
143
|
+
@video_info = @extractor.extract_info
|
|
144
|
+
|
|
145
|
+
output_file = generate_output_path(@video_info)
|
|
146
|
+
|
|
147
|
+
if @options.extract_audio
|
|
148
|
+
download_audio(output_file)
|
|
149
|
+
else
|
|
150
|
+
download_video(output_file)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
download_subtitles if @options.write_subtitles || @options.write_auto_sub
|
|
154
|
+
download_metadata if @options.write_info_json
|
|
155
|
+
download_thumbnail if @options.write_thumbnail
|
|
156
|
+
download_description if @options.write_description
|
|
157
|
+
|
|
158
|
+
output_file
|
|
159
|
+
rescue => e
|
|
160
|
+
handle_ruby_error(e)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def download_segment_with_ytdlp(start_time, end_time, output_file)
|
|
165
|
+
log "Using yt-dlp backend for segment download"
|
|
166
|
+
|
|
167
|
+
output_file = ytdlp_wrapper.download_segment(@url, start_time, end_time, output_file)
|
|
168
|
+
log "Downloaded segment successfully with yt-dlp: #{output_file}"
|
|
169
|
+
output_file
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def download_segments_with_ytdlp(segments)
|
|
173
|
+
log "Using yt-dlp backend for batch segment download (optimized: 1 download + local segmentation)"
|
|
174
|
+
|
|
175
|
+
output_files = []
|
|
176
|
+
|
|
177
|
+
begin
|
|
178
|
+
# Download full video once using yt-dlp (handles all YouTube protection)
|
|
179
|
+
full_video_path = get_full_video_for_segmentation_with_ytdlp
|
|
180
|
+
|
|
181
|
+
# Extract all segments locally using FFmpeg (fast and efficient)
|
|
182
|
+
segments.each_with_index do |seg, idx|
|
|
183
|
+
start_time = seg[:start]
|
|
184
|
+
end_time = seg[:end]
|
|
185
|
+
output_file = seg[:output_file] || generate_segment_output_path(@video_info, start_time, end_time)
|
|
186
|
+
|
|
187
|
+
log "Extracting segment #{idx + 1}/#{segments.size}: #{start_time}-#{end_time}s"
|
|
188
|
+
|
|
189
|
+
# Extract segment using ffmpeg (same as Pure Ruby backend)
|
|
190
|
+
extract_segment(full_video_path, output_file, start_time, end_time)
|
|
191
|
+
|
|
192
|
+
# Download subtitles for segment if requested
|
|
193
|
+
if @options.write_subtitles || @options.write_auto_sub
|
|
194
|
+
download_subtitles_for_segment(start_time, end_time)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
output_files << output_file
|
|
198
|
+
end
|
|
199
|
+
ensure
|
|
200
|
+
# Clean up cache if not enabled
|
|
201
|
+
cleanup_video_cache unless @options.cache_full_video
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
output_files
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def handle_ytdlp_error(error, fallback: nil)
|
|
209
|
+
log "yt-dlp error: #{error.message}"
|
|
210
|
+
|
|
211
|
+
# Try fallback to pure Ruby if enabled and not already tried
|
|
212
|
+
if @options.ytdlp_fallback && !@tried_ruby
|
|
213
|
+
if fallback
|
|
214
|
+
log "Falling back to pure Ruby implementation"
|
|
215
|
+
return fallback.call
|
|
216
|
+
else
|
|
217
|
+
log "Falling back to pure Ruby implementation"
|
|
218
|
+
return download_with_ruby
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
raise DownloadError, "yt-dlp failed: #{error.message}"
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def handle_ruby_error(error)
|
|
226
|
+
log "Pure Ruby error: #{error.message}"
|
|
227
|
+
|
|
228
|
+
# Try fallback to yt-dlp if:
|
|
229
|
+
# 1. It's a 403 error (signature/auth issue)
|
|
230
|
+
# 2. ytdlp_fallback is enabled
|
|
231
|
+
# 3. yt-dlp is available
|
|
232
|
+
# 4. Haven't tried yt-dlp yet
|
|
233
|
+
if @options.ytdlp_fallback && ytdlp_available? && !@tried_ytdlp
|
|
234
|
+
if error.message.include?('403') || error.is_a?(Extractor::ExtractionError)
|
|
235
|
+
log "Falling back to yt-dlp"
|
|
236
|
+
return download_with_ytdlp
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
raise DownloadError, "Download failed: #{error.message}"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def log(message)
|
|
244
|
+
puts "[YoutubeRb] #{message}" if @options.verbose
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def download_video(output_file)
|
|
248
|
+
# Always use HTTP download (pure Ruby)
|
|
249
|
+
download_with_http(output_file)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def download_audio(output_file)
|
|
253
|
+
base_output = output_file.sub(/\.[^.]+$/, '')
|
|
254
|
+
|
|
255
|
+
# Download video first, then extract audio with FFmpeg
|
|
256
|
+
temp_video = generate_temp_path
|
|
257
|
+
download_with_http(temp_video)
|
|
258
|
+
extract_audio(temp_video, "#{base_output}.#{@options.audio_format}")
|
|
259
|
+
File.delete(temp_video) if File.exist?(temp_video)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def download_with_http(output_file)
|
|
263
|
+
format = @video_info.best_format
|
|
264
|
+
raise DownloadError, "No suitable format found" unless format
|
|
265
|
+
|
|
266
|
+
url = format[:url]
|
|
267
|
+
raise DownloadError, "No URL found in format" unless url
|
|
268
|
+
|
|
269
|
+
puts "Downloading from: #{url[0..80]}..." if @options.respond_to?(:verbose) && @options.verbose
|
|
270
|
+
|
|
271
|
+
# Use streaming download with progress
|
|
272
|
+
downloaded = 0
|
|
273
|
+
File.open(output_file, 'wb') do |file|
|
|
274
|
+
response = http_client.get(url) do |req|
|
|
275
|
+
req.options.on_data = Proc.new do |chunk, overall_received_bytes|
|
|
276
|
+
file.write(chunk)
|
|
277
|
+
downloaded = overall_received_bytes
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
unless response.success?
|
|
282
|
+
raise DownloadError, "HTTP download failed with status #{response.status}"
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
puts "Downloaded #{(downloaded / 1024.0 / 1024.0).round(2)} MB" if @options.respond_to?(:verbose) && @options.verbose
|
|
287
|
+
rescue Faraday::Error => e
|
|
288
|
+
raise DownloadError, "Network error during download: #{e.message}"
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def download_subtitles(langs = nil)
|
|
292
|
+
langs ||= @options.subtitle_langs
|
|
293
|
+
return if @video_info.subtitles.empty?
|
|
294
|
+
|
|
295
|
+
langs.each do |lang|
|
|
296
|
+
subtitle_data = @video_info.get_subtitle(lang)
|
|
297
|
+
next unless subtitle_data
|
|
298
|
+
|
|
299
|
+
subtitle_data.each do |sub|
|
|
300
|
+
download_subtitle_file(sub, lang)
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def download_subtitles_for_segment(start_time, end_time)
|
|
306
|
+
langs = @options.subtitle_langs
|
|
307
|
+
return if @video_info.subtitles.empty?
|
|
308
|
+
|
|
309
|
+
langs.each do |lang|
|
|
310
|
+
subtitle_data = @video_info.get_subtitle(lang)
|
|
311
|
+
next unless subtitle_data
|
|
312
|
+
|
|
313
|
+
subtitle_data.each do |sub|
|
|
314
|
+
output_file = generate_subtitle_segment_path(lang, start_time, end_time)
|
|
315
|
+
download_and_trim_subtitle(sub, output_file, start_time, end_time)
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def download_subtitle_file(subtitle, lang)
|
|
321
|
+
output_file = generate_subtitle_path(lang, subtitle[:ext])
|
|
322
|
+
|
|
323
|
+
begin
|
|
324
|
+
response = http_client.get(subtitle[:url])
|
|
325
|
+
|
|
326
|
+
unless response.success?
|
|
327
|
+
warn "Failed to download subtitle: HTTP #{response.status}"
|
|
328
|
+
return
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
File.write(output_file, response.body)
|
|
332
|
+
|
|
333
|
+
# Convert to requested format if different
|
|
334
|
+
if @options.subtitle_format != subtitle[:ext]
|
|
335
|
+
convert_subtitle_format(output_file, @options.subtitle_format)
|
|
336
|
+
end
|
|
337
|
+
rescue => e
|
|
338
|
+
warn "Failed to download subtitle for #{lang}: #{e.message}"
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
def download_and_trim_subtitle(subtitle, output_file, start_time, end_time)
|
|
343
|
+
response = http_client.get(subtitle[:url])
|
|
344
|
+
content = response.body
|
|
345
|
+
|
|
346
|
+
# Parse and trim subtitle based on time range
|
|
347
|
+
trimmed_content = trim_subtitle_content(content, start_time, end_time, subtitle[:ext])
|
|
348
|
+
|
|
349
|
+
File.write(output_file, trimmed_content)
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def extract_segment(input_file, output_file, start_time, end_time)
|
|
353
|
+
unless ffmpeg_available?
|
|
354
|
+
raise DownloadError, "FFmpeg is required for segment extraction. Please install ffmpeg."
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
duration = end_time - start_time
|
|
358
|
+
|
|
359
|
+
cmd = [
|
|
360
|
+
'ffmpeg',
|
|
361
|
+
'-i', input_file,
|
|
362
|
+
'-ss', start_time.to_s,
|
|
363
|
+
'-t', duration.to_s,
|
|
364
|
+
'-c', 'copy',
|
|
365
|
+
'-avoid_negative_ts', '1',
|
|
366
|
+
output_file,
|
|
367
|
+
'-y'
|
|
368
|
+
].join(' ')
|
|
369
|
+
|
|
370
|
+
stdout, stderr, status = Open3.capture3(cmd)
|
|
371
|
+
|
|
372
|
+
unless status.success?
|
|
373
|
+
raise DownloadError, "Segment extraction failed: #{stderr}"
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
def extract_audio(input_file, output_file)
|
|
378
|
+
unless ffmpeg_available?
|
|
379
|
+
raise DownloadError, "FFmpeg is required for audio extraction. Please install ffmpeg."
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
cmd = [
|
|
383
|
+
'ffmpeg',
|
|
384
|
+
'-i', input_file,
|
|
385
|
+
'-vn',
|
|
386
|
+
'-acodec', audio_codec_for_format(@options.audio_format),
|
|
387
|
+
'-ab', "#{@options.audio_quality}k",
|
|
388
|
+
output_file,
|
|
389
|
+
'-y'
|
|
390
|
+
].join(' ')
|
|
391
|
+
|
|
392
|
+
stdout, stderr, status = Open3.capture3(cmd)
|
|
393
|
+
|
|
394
|
+
unless status.success?
|
|
395
|
+
raise DownloadError, "Audio extraction failed: #{stderr}"
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
def trim_subtitle_content(content, start_time, end_time, format)
|
|
400
|
+
case format
|
|
401
|
+
when 'vtt', 'srt'
|
|
402
|
+
trim_vtt_or_srt(content, start_time, end_time)
|
|
403
|
+
else
|
|
404
|
+
content # Return as-is for unsupported formats
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
def trim_vtt_or_srt(content, start_time, end_time)
|
|
409
|
+
lines = content.split("\n")
|
|
410
|
+
result = []
|
|
411
|
+
current_block = []
|
|
412
|
+
in_cue = false
|
|
413
|
+
|
|
414
|
+
lines.each do |line|
|
|
415
|
+
if match_data = line.match(/(\d{2}:\d{2}:\d{2}[.,]\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}[.,]\d{3})/)
|
|
416
|
+
# This is a timestamp line
|
|
417
|
+
cue_start = parse_subtitle_time(match_data[1])
|
|
418
|
+
cue_end = parse_subtitle_time(match_data[2])
|
|
419
|
+
|
|
420
|
+
if cue_end >= start_time && cue_start <= end_time
|
|
421
|
+
# Adjust timestamps relative to segment start
|
|
422
|
+
adjusted_start = [cue_start - start_time, 0].max
|
|
423
|
+
adjusted_end = [cue_end - start_time, end_time - start_time].min
|
|
424
|
+
|
|
425
|
+
current_block << format_subtitle_time(adjusted_start) + ' --> ' + format_subtitle_time(adjusted_end)
|
|
426
|
+
in_cue = true
|
|
427
|
+
else
|
|
428
|
+
in_cue = false
|
|
429
|
+
current_block = []
|
|
430
|
+
end
|
|
431
|
+
elsif in_cue
|
|
432
|
+
current_block << line
|
|
433
|
+
if line.strip.empty? && current_block.size > 1
|
|
434
|
+
result.concat(current_block)
|
|
435
|
+
current_block = []
|
|
436
|
+
end
|
|
437
|
+
elsif line.start_with?('WEBVTT') || line.start_with?('Kind:') || line.start_with?('Language:')
|
|
438
|
+
result << line
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
result.join("\n")
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
def parse_subtitle_time(time_str)
|
|
446
|
+
# Parse format: 00:00:10.500 or 00:00:10,500
|
|
447
|
+
parts = time_str.tr(',', '.').split(':')
|
|
448
|
+
hours = parts[0].to_i
|
|
449
|
+
minutes = parts[1].to_i
|
|
450
|
+
seconds_parts = parts[2].split('.')
|
|
451
|
+
seconds = seconds_parts[0].to_i
|
|
452
|
+
milliseconds = seconds_parts[1].to_i
|
|
453
|
+
|
|
454
|
+
hours * 3600 + minutes * 60 + seconds + milliseconds / 1000.0
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
def format_subtitle_time(seconds)
|
|
458
|
+
hours = (seconds / 3600).to_i
|
|
459
|
+
minutes = ((seconds % 3600) / 60).to_i
|
|
460
|
+
secs = (seconds % 60).to_i
|
|
461
|
+
millis = ((seconds % 1) * 1000).to_i
|
|
462
|
+
|
|
463
|
+
format("%02d:%02d:%02d.%03d", hours, minutes, secs, millis)
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
def download_metadata
|
|
467
|
+
output_file = generate_metadata_path
|
|
468
|
+
File.write(output_file, JSON.pretty_generate(@video_info.to_h))
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def download_thumbnail
|
|
472
|
+
return unless @video_info.thumbnail
|
|
473
|
+
|
|
474
|
+
output_file = generate_thumbnail_path
|
|
475
|
+
response = http_client.get(@video_info.thumbnail)
|
|
476
|
+
File.write(output_file, response.body)
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
def download_description
|
|
480
|
+
return unless @video_info.description
|
|
481
|
+
|
|
482
|
+
output_file = generate_description_path
|
|
483
|
+
File.write(output_file, @video_info.description)
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
def convert_subtitle_format(input_file, target_format)
|
|
487
|
+
# Basic conversion support (can be extended)
|
|
488
|
+
return if File.extname(input_file) == ".#{target_format}"
|
|
489
|
+
|
|
490
|
+
output_file = input_file.sub(/\.[^.]+$/, ".#{target_format}")
|
|
491
|
+
|
|
492
|
+
# For now, just rename for compatible formats
|
|
493
|
+
# TODO: Add proper conversion logic for different formats
|
|
494
|
+
FileUtils.mv(input_file, output_file)
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
def audio_codec_for_format(format)
|
|
498
|
+
case format
|
|
499
|
+
when 'mp3'
|
|
500
|
+
'libmp3lame'
|
|
501
|
+
when 'aac', 'm4a'
|
|
502
|
+
'aac'
|
|
503
|
+
when 'opus'
|
|
504
|
+
'libopus'
|
|
505
|
+
when 'vorbis', 'ogg'
|
|
506
|
+
'libvorbis'
|
|
507
|
+
when 'flac'
|
|
508
|
+
'flac'
|
|
509
|
+
when 'wav'
|
|
510
|
+
'pcm_s16le'
|
|
511
|
+
else
|
|
512
|
+
'copy'
|
|
513
|
+
end
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def generate_output_path(video_info)
|
|
517
|
+
template = @options.output_template
|
|
518
|
+
|
|
519
|
+
# Replace template variables
|
|
520
|
+
filename = template
|
|
521
|
+
.gsub('%(title)s', sanitize_filename(video_info.title))
|
|
522
|
+
.gsub('%(id)s', video_info.id)
|
|
523
|
+
.gsub('%(ext)s', @options.extract_audio ? @options.audio_format : (video_info.ext || 'mp4'))
|
|
524
|
+
.gsub('%(uploader)s', sanitize_filename(video_info.uploader || 'unknown'))
|
|
525
|
+
|
|
526
|
+
File.join(@options.output_path, filename)
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
def generate_segment_output_path(video_info, start_time, end_time)
|
|
530
|
+
filename = "#{sanitize_filename(video_info.title)}-#{video_info.id}-segment-#{start_time}-#{end_time}.#{video_info.ext || 'mp4'}"
|
|
531
|
+
File.join(@options.output_path, filename)
|
|
532
|
+
end
|
|
533
|
+
|
|
534
|
+
def generate_subtitle_path(lang, ext)
|
|
535
|
+
filename = "#{sanitize_filename(@video_info.title)}-#{@video_info.id}.#{lang}.#{ext}"
|
|
536
|
+
File.join(@options.output_path, filename)
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
def generate_subtitle_segment_path(lang, start_time, end_time)
|
|
540
|
+
filename = "#{sanitize_filename(@video_info.title)}-#{@video_info.id}-segment-#{start_time}-#{end_time}.#{lang}.#{@options.subtitle_format}"
|
|
541
|
+
File.join(@options.output_path, filename)
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
def generate_metadata_path
|
|
545
|
+
filename = "#{sanitize_filename(@video_info.title)}-#{@video_info.id}.info.json"
|
|
546
|
+
File.join(@options.output_path, filename)
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
def generate_thumbnail_path
|
|
550
|
+
ext = File.extname(@video_info.thumbnail).split('?').first || '.jpg'
|
|
551
|
+
filename = "#{sanitize_filename(@video_info.title)}-#{@video_info.id}#{ext}"
|
|
552
|
+
File.join(@options.output_path, filename)
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
def generate_description_path
|
|
556
|
+
filename = "#{sanitize_filename(@video_info.title)}-#{@video_info.id}.description"
|
|
557
|
+
File.join(@options.output_path, filename)
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
def generate_temp_path
|
|
561
|
+
File.join(@options.output_path, ".temp_#{Time.now.to_i}_#{rand(1000)}.mp4")
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
def sanitize_filename(filename)
|
|
565
|
+
return 'video' if filename.nil? || filename.empty?
|
|
566
|
+
|
|
567
|
+
filename.to_s
|
|
568
|
+
.gsub(/[\/\\:*?"<>|]/, '_')
|
|
569
|
+
.gsub(/\s+/, ' ')
|
|
570
|
+
.strip
|
|
571
|
+
end
|
|
572
|
+
|
|
573
|
+
def ensure_output_directory
|
|
574
|
+
FileUtils.mkdir_p(@options.output_path) unless Dir.exist?(@options.output_path)
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
def valid_segment_duration?(duration)
|
|
578
|
+
duration >= @options.min_segment_duration && duration <= @options.max_segment_duration
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
def get_full_video_for_segmentation_with_ytdlp
|
|
582
|
+
# Return cached video if available
|
|
583
|
+
if @cached_video_path && File.exist?(@cached_video_path)
|
|
584
|
+
log "Using cached video: #{@cached_video_path}"
|
|
585
|
+
return @cached_video_path
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
# Extract video info first (needed for segment naming)
|
|
589
|
+
@video_info ||= @extractor.extract_info
|
|
590
|
+
|
|
591
|
+
# Download full video using yt-dlp
|
|
592
|
+
@cached_video_path = generate_cache_path
|
|
593
|
+
log "Downloading full video via yt-dlp for segmentation: #{@cached_video_path}"
|
|
594
|
+
ytdlp_wrapper.download(@url, @cached_video_path)
|
|
595
|
+
|
|
596
|
+
@cached_video_path
|
|
597
|
+
end
|
|
598
|
+
|
|
599
|
+
def cleanup_video_cache
|
|
600
|
+
if @cached_video_path && File.exist?(@cached_video_path)
|
|
601
|
+
log "Cleaning up cached video: #{@cached_video_path}"
|
|
602
|
+
File.delete(@cached_video_path)
|
|
603
|
+
@cached_video_path = nil
|
|
604
|
+
end
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
def generate_cache_path
|
|
608
|
+
File.join(@options.output_path, ".cache_#{Time.now.to_i}_#{rand(10000)}.mp4")
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
def ffmpeg_available?
|
|
612
|
+
system('which ffmpeg > /dev/null 2>&1')
|
|
613
|
+
end
|
|
614
|
+
|
|
615
|
+
def http_client
|
|
616
|
+
@http_client ||= Faraday.new do |f|
|
|
617
|
+
f.request :retry, max: @options.retries, interval: 0.5, backoff_factor: 2
|
|
618
|
+
f.adapter Faraday.default_adapter
|
|
619
|
+
f.options.timeout = 600 # 10 minutes for large downloads
|
|
620
|
+
f.options.open_timeout = 30
|
|
621
|
+
|
|
622
|
+
f.headers['User-Agent'] = @options.user_agent || 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
623
|
+
f.headers['Accept'] = '*/*'
|
|
624
|
+
f.headers['Accept-Language'] = 'en-US,en;q=0.9'
|
|
625
|
+
f.headers['Referer'] = @options.referer if @options.referer
|
|
626
|
+
|
|
627
|
+
# Add range support for resuming downloads
|
|
628
|
+
f.headers['Range'] = 'bytes=0-' if @options.continue_download
|
|
629
|
+
end
|
|
630
|
+
end
|
|
631
|
+
end
|
|
632
|
+
end
|