rubytube 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,165 @@
1
+ module RubyTube
2
+ class Client
3
+ attr_accessor :video_id, :watch_url, :embed_url, :stream_monostate
4
+
5
+ def initialize(url)
6
+ self.video_id = Extractor.video_id(url)
7
+
8
+ self.watch_url = "https://youtube.com/watch?v=#{video_id}"
9
+ self.embed_url = "https://www.youtube.com/embed/#{video_id}"
10
+
11
+ self.stream_monostate = Monostate.new
12
+ end
13
+
14
+ def watch_html
15
+ return @watch_html if @watch_html
16
+
17
+ @watch_html = Request.get(watch_url)
18
+ @watch_html
19
+ end
20
+
21
+ def js
22
+ return @js if @js
23
+
24
+ @js = Request.get(js_url)
25
+ @js
26
+ end
27
+
28
+ def js_url
29
+ return @js_url if @js_url
30
+
31
+ @js_url = Extractor.js_url(watch_html)
32
+ @js_url
33
+ end
34
+
35
+ def streaming_data
36
+ return vid_info['streamingData'] if vid_info && vid_info.key?('streamingData')
37
+
38
+ bypass_age_gate
39
+ vid_info['streamingData']
40
+ end
41
+
42
+ def fmt_streams
43
+ check_availability
44
+ return @fmt_streams if @fmt_streams
45
+
46
+ @fmt_streams = []
47
+ stream_manifest = Extractor.apply_descrambler(streaming_data)
48
+
49
+ begin
50
+ Extractor.apply_signature(stream_manifest, vid_info, js)
51
+ rescue ExtractError
52
+ js = nil
53
+ js_url = nil
54
+ Extractor.apply_signature(stream_manifest, vid_info, js)
55
+ end
56
+
57
+ for stream in stream_manifest
58
+ @fmt_streams << Stream.new(stream, stream_monostate)
59
+ end
60
+
61
+ stream_monostate.title = title
62
+ stream_monostate.duration = length
63
+
64
+ @fmt_streams
65
+ end
66
+
67
+ def check_availability
68
+ status, messages = Extractor.playability_status(watch_html)
69
+
70
+ messages.each do |reason|
71
+ case status
72
+ when 'UNPLAYABLE'
73
+ case reason
74
+ when 'Join this channel to get access to members-only content like this video, and other exclusive perks.'
75
+ raise MembersOnly.new(video_id)
76
+ when 'This live stream recording is not available.'
77
+ raise RecordingUnavailable.new(video_id)
78
+ else
79
+ raise VideoUnavailable.new(video_id)
80
+ end
81
+ when 'LOGIN_REQUIRED'
82
+ if reason == 'This is a private video. Please sign in to verify that you may see it.'
83
+ raise VideoPrivate.new(video_id)
84
+ end
85
+ when 'ERROR'
86
+ if reason == 'Video unavailable'
87
+ raise VideoUnavailable.new(video_id)
88
+ end
89
+ when 'LIVE_STREAM'
90
+ raise LiveStreamError.new(video_id)
91
+ end
92
+ end
93
+ end
94
+
95
+ def streams
96
+ return @streams if @streams
97
+
98
+ check_availability
99
+ @streams = StreamQuery.new(fmt_streams)
100
+ end
101
+
102
+ def vid_info
103
+ return @vid_info if @vid_info
104
+
105
+ it = InnerTube.new
106
+ @vid_info = it.player(video_id)
107
+
108
+ @vid_info
109
+ end
110
+
111
+ def bypass_age_gate
112
+ it = InnerTube.new(client: 'ANDROID_EMBED')
113
+ resp = it.player(video_id)
114
+
115
+ status = resp['playabilityStatus']['status']
116
+ if status == 'UNPLAYABLE'
117
+ raise VideoUnavailable.new(video_id)
118
+ end
119
+
120
+ @vid_info = resp
121
+ end
122
+
123
+ def title
124
+ return @title if @title
125
+
126
+ @title = vid_info['videoDetails']['title']
127
+ @title
128
+ end
129
+
130
+ def length
131
+ return @length if @length
132
+
133
+ @length = vid_info['videoDetails']['lengthSeconds'].to_i
134
+ @length
135
+ end
136
+
137
+ def views
138
+ return @views if @views
139
+
140
+ @views = vid_info['videoDetails']['viewCount'].to_i
141
+ @views
142
+ end
143
+
144
+ def author
145
+ return @author if @author
146
+
147
+ @author = vid_info['videoDetails']['author']
148
+ @author
149
+ end
150
+
151
+ def keywords
152
+ return @keywords if @keywords
153
+
154
+ @keywords = vid_info['videoDetails']['keywords']
155
+ @keywords
156
+ end
157
+
158
+ def channel_id
159
+ return @channel_id if @channel_id
160
+
161
+ @channel_id = vid_info['videoDetails']['channelId']
162
+ @channel_id
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,177 @@
1
+ module RubyTube
2
+ class Extractor
3
+ class << self
4
+ def playability_status(watch_html)
5
+ player_response = initial_player_response(watch_html)
6
+ player_response = JSON.parse(player_response)
7
+ status_obj = player_response['playabilityStatus'] || {}
8
+
9
+ if status_obj.has_key?('liveStreamability')
10
+ return ['LIVE_STREAM', 'Video is a live stream.']
11
+ end
12
+
13
+ if status_obj.has_key?('status')
14
+ if status_obj.has_key?('reason')
15
+ return [status_obj['status'], [status_obj['reason']]]
16
+ end
17
+
18
+ if status_obj.has_key?('messages')
19
+ return [status_obj['status'], status_obj['messages']]
20
+ end
21
+ end
22
+
23
+ [nil, [nil]]
24
+ end
25
+
26
+ def video_id(url)
27
+ return Utils.regex_search(/(?:v=|\/)([0-9A-Za-z_-]{11}).*/, url, 1)
28
+ end
29
+
30
+ def js_url(html)
31
+ begin
32
+ base_js = get_ytplayer_config(html)['assets']['js']
33
+ rescue RegexMatchError, NoMethodError
34
+ base_js = get_ytplayer_js(html)
35
+ end
36
+
37
+ "https://youtube.com#{base_js}"
38
+ end
39
+
40
+ def mime_type_codec(mime_type_codec)
41
+ pattern = %r{(\w+\/\w+)\;\scodecs=\"([a-zA-Z\-0-9.,\s]*)\"}
42
+ results = mime_type_codec.match(pattern)
43
+
44
+ raise RegexMatchError.new("mime_type_codec, pattern=#{pattern}") if results.nil?
45
+
46
+ mime_type, codecs = results.captures
47
+ [mime_type, codecs.split(",").map(&:strip)]
48
+ end
49
+
50
+ def get_ytplayer_js(html)
51
+ js_url_patterns = [
52
+ %r{(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)},
53
+ ]
54
+
55
+ js_url_patterns.each do |pattern|
56
+ function_match = html.match(pattern)
57
+ if function_match
58
+ return function_match[1]
59
+ end
60
+ end
61
+
62
+ raise RegexMatchError.new('get_ytplayer_js', 'js_url_patterns')
63
+ end
64
+
65
+ def get_ytplayer_config(html)
66
+ config_patterns = [
67
+ /ytplayer\.config\s*=\s*/,
68
+ /ytInitialPlayerResponse\s*=\s*/
69
+ ]
70
+
71
+ config_patterns.each do |pattern|
72
+ begin
73
+ return Parser.parse_for_object(html, pattern)
74
+ rescue HTMLParseError => e
75
+ next
76
+ end
77
+ end
78
+
79
+ setconfig_patterns = [
80
+ /yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*/
81
+ ]
82
+
83
+ setconfig_patterns.each do |pattern|
84
+ begin
85
+ return Parser.parse_for_object(html, pattern)
86
+ rescue HTMLParseError => e
87
+ next
88
+ end
89
+ end
90
+
91
+ raise RegexMatchError.new('get_ytplayer_config', 'config_patterns, setconfig_patterns')
92
+ end
93
+
94
+ def apply_signature(stream_manifest, vid_info, js)
95
+ cipher = Cipher.new(js)
96
+
97
+ stream_manifest.each_with_index do |stream, i|
98
+ begin
99
+ url = stream['url']
100
+ rescue NoMethodError
101
+ live_stream = vid_info.fetch('playabilityStatus', {})['liveStreamability']
102
+ if live_stream
103
+ raise LiveStreamError.new('UNKNOWN')
104
+ end
105
+ end
106
+
107
+ if url.include?("signature") ||
108
+ (!stream.key?("s") && (url.include?("&sig=") || url.include?("&lsig=")))
109
+ # For certain videos, YouTube will just provide them pre-signed, in
110
+ # which case there's no real magic to download them and we can skip
111
+ # the whole signature descrambling entirely.
112
+ next
113
+ end
114
+
115
+ signature = cipher.get_signature(stream['s'])
116
+
117
+ parsed_url = URI.parse(url)
118
+
119
+ query_params = CGI.parse(parsed_url.query)
120
+ query_params.transform_values!(&:first)
121
+ query_params['sig'] = signature
122
+ unless query_params.key?('ratebypass')
123
+ initial_n = query_params['n'].split('')
124
+ new_n = cipher.calculate_n(initial_n)
125
+ query_params['n'] = new_n
126
+ end
127
+
128
+ url = "#{parsed_url.scheme}://#{parsed_url.host}#{parsed_url.path}?#{URI.encode_www_form(query_params)}"
129
+
130
+ stream_manifest[i]["url"] = url
131
+ end
132
+ end
133
+
134
+ def apply_descrambler(stream_data)
135
+ return if stream_data.has_key?('url')
136
+
137
+ # Merge formats and adaptiveFormats into a single array
138
+ formats = []
139
+ formats += stream_data['formats'] if stream_data.has_key?('formats')
140
+ formats += stream_data['adaptiveFormats'] if stream_data.has_key?('adaptiveFormats')
141
+
142
+ # Extract url and s from signatureCiphers as necessary
143
+ formats.each do |data|
144
+ unless data.has_key?('url')
145
+ if data.has_key?('signatureCipher')
146
+ cipher_url = URI.decode_www_form(data['signatureCipher']).to_h
147
+ data['url'] = cipher_url['url']
148
+ data['s'] = cipher_url['s']
149
+ end
150
+ end
151
+ data['is_otf'] = data['type'] == 'FORMAT_STREAM_TYPE_OTF'
152
+ end
153
+
154
+ formats
155
+ end
156
+
157
+ private
158
+
159
+ def initial_player_response(watch_html)
160
+ patterns = [
161
+ "window\\[['\"]ytInitialPlayerResponse['\"]\\]\\s*=\\s*",
162
+ "ytInitialPlayerResponse\\s*=\\s*"
163
+ ]
164
+
165
+ patterns.each do |pattern|
166
+ begin
167
+ return Parser.parse_for_object(watch_html, pattern)
168
+ rescue HTMLParseError
169
+ next
170
+ end
171
+ end
172
+
173
+ raise RegexMatchError.new('initial_player_response', 'initial_player_response_pattern')
174
+ end
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,105 @@
1
+ module RubyTube
2
+ class InnerTube
3
+ DEFALUT_CLIENTS = {
4
+ 'WEB' => {
5
+ context: {
6
+ client: {
7
+ clientName: 'WEB',
8
+ clientVersion: '2.20200720.00.02'
9
+ }
10
+ },
11
+ header: { 'User-Agent': 'Mozilla/5.0' },
12
+ api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
13
+ },
14
+ 'ANDROID_MUSIC' => {
15
+ context: {
16
+ client: {
17
+ clientName: 'ANDROID_MUSIC',
18
+ clientVersion: '5.16.51',
19
+ androidSdkVersion: 30,
20
+ },
21
+ },
22
+ header: { 'User-Agent': 'com.google.android.apps.youtube.music/'},
23
+ api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
24
+ },
25
+ 'ANDROID_EMBED' => {
26
+ context: {
27
+ client: {
28
+ clientName: 'ANDROID_EMBEDDED_PLAYER',
29
+ clientVersion: '17.31.35',
30
+ clientScreen: 'EMBED',
31
+ androidSdkVersion: 30,
32
+ }
33
+ },
34
+ header: { 'User-Agent': 'com.google.android.youtube/' },
35
+ api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
36
+ },
37
+ }
38
+
39
+ BASE_URL = 'https://www.youtube.com/youtubei/v1'
40
+
41
+ attr_accessor :context, :header, :api_key, :access_token, :refresh_token, :use_oauth, :allow_cache, :expires
42
+
43
+ def initialize(client: 'ANDROID_MUSIC', use_oauth: false, allow_cache: false)
44
+ self.context = DEFALUT_CLIENTS[client][:context]
45
+ self.header = DEFALUT_CLIENTS[client][:header]
46
+ self.api_key = DEFALUT_CLIENTS[client][:api_key]
47
+ self.use_oauth = use_oauth
48
+ self.allow_cache = allow_cache
49
+ end
50
+
51
+ def cache_tokens
52
+ return unless allow_cache
53
+
54
+ # TODO:
55
+ end
56
+
57
+ def refresh_bearer_token(force: false)
58
+ # TODO:
59
+ end
60
+
61
+ def fetch_bearer_token
62
+ # TODO:
63
+ end
64
+
65
+ def send(endpoint, query, data)
66
+ if use_oauth
67
+ query.delete(:key)
68
+ end
69
+
70
+ headers = {
71
+ 'Content-Type': 'application/json',
72
+ }
73
+
74
+ if use_oauth
75
+ if access_token
76
+ refresh_bearer_token
77
+ headers['Authorization'] = "Bearer #{access_token}"
78
+ else
79
+ fetch_bearer_token
80
+ headers['Authorization'] = "Bearer #{access_token}"
81
+ end
82
+ end
83
+
84
+ options = {}
85
+ options[:headers] = headers.merge(header)
86
+
87
+ options[:query] = {
88
+ key: api_key,
89
+ contentCheckOk: true,
90
+ racyCheckOk: true,
91
+ }.merge(query)
92
+ options[:data] = data
93
+
94
+ resp = Request.post(endpoint, options)
95
+ JSON.parse(resp)
96
+ end
97
+
98
+ def player(video_id)
99
+ endpoint = "#{BASE_URL}/player"
100
+ query = { 'videoId' => video_id }
101
+
102
+ send(endpoint, query, {context: context})
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,5 @@
1
+ module RubyTube
2
+ class Monostate
3
+ attr_accessor :title, :duration
4
+ end
5
+ end
@@ -0,0 +1,164 @@
1
+ module RubyTube
2
+ module Parser
3
+ module_function
4
+
5
+ def parse_for_object(html, preceding_regex)
6
+ regex = Regexp.new(preceding_regex)
7
+ result = regex.match(html)
8
+
9
+ if result.nil?
10
+ raise HTMLParseError, "No matches for regex #{preceding_regex}"
11
+ end
12
+ start_index = result.end(0)
13
+
14
+ return parse_for_object_from_startpoint(html, start_index)
15
+ end
16
+
17
+ def find_object_from_startpoint(html, start_point)
18
+ html = html[start_point..-1]
19
+ unless ['{', '['].include?(html[0])
20
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
21
+ end
22
+
23
+ last_char = '{'
24
+ curr_char = nil
25
+ stack = [html[0]]
26
+ i = 1
27
+
28
+ context_closers = {
29
+ '{' => '}',
30
+ '[' => ']',
31
+ '"' => '"',
32
+ '/' => '/',
33
+ }
34
+
35
+ while i < html.length
36
+ break if stack.empty?
37
+
38
+ last_char = curr_char unless [' ', '\n'].include?(curr_char)
39
+ curr_char = html[i]
40
+ curr_context = stack.last
41
+
42
+ if curr_char == context_closers[curr_context]
43
+ stack.pop
44
+ i += 1
45
+ next
46
+ end
47
+
48
+ if ['"', '/'].include?(curr_context)
49
+ if curr_char == '\\'
50
+ i += 2
51
+ next
52
+ end
53
+ else
54
+ if context_closers.keys.include?(curr_char)
55
+ unless curr_char == '/' && !['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
56
+ stack.push(curr_char)
57
+ end
58
+ end
59
+ end
60
+
61
+ i += 1
62
+ end
63
+
64
+ full_obj = html[0...i]
65
+ full_obj
66
+ end
67
+
68
+ def parse_for_object_from_startpoint(html, start_point)
69
+ html = html[start_point..-1]
70
+
71
+ unless ['{', '['].include?(html[0])
72
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
73
+ end
74
+
75
+ # First letter MUST be an open brace, so we put that in the stack,
76
+ # and skip the first character.
77
+ last_char = '{'
78
+ curr_char = nil
79
+ stack = [html[0]]
80
+ i = 1
81
+
82
+ context_closers = {
83
+ '{' => '}',
84
+ '[' => ']',
85
+ '"' => '"',
86
+ '/' => '/' # JavaScript regex
87
+ }
88
+
89
+ while i < html.length
90
+ break if stack.empty?
91
+
92
+ last_char = curr_char unless [' ', '\n'].include?(curr_char)
93
+ curr_char = html[i]
94
+ curr_context = stack.last
95
+
96
+ # If we've reached a context closer, we can remove an element off the stack
97
+ if curr_char == context_closers[curr_context]
98
+ stack.pop
99
+ i += 1
100
+ next
101
+ end
102
+ # Strings and regex expressions require special context handling because they can contain
103
+ # context openers *and* closers
104
+ if ['"', '/'].include?(curr_context)
105
+ # If there's a backslash in a string or regex expression, we skip a character
106
+ if curr_char == '\\'
107
+ i += 2
108
+ next
109
+ end
110
+ else
111
+ # Non-string contexts are when we need to look for context openers.
112
+ if context_closers.keys.include?(curr_char)
113
+ # Slash starts a regular expression depending on context
114
+ unless curr_char == '/' && ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
115
+ stack << curr_char
116
+ end
117
+ end
118
+ end
119
+
120
+ i += 1
121
+ end
122
+
123
+ full_obj = html[0..(i - 1)]
124
+ full_obj
125
+ end
126
+
127
+ def throttling_array_split(js_array)
128
+ results = []
129
+ curr_substring = js_array[1..-1]
130
+
131
+ comma_regex = /,/
132
+ func_regex = /function\([^)]*\)/
133
+
134
+ until curr_substring.empty?
135
+ if curr_substring.start_with?('function')
136
+ match = func_regex.match(curr_substring)
137
+ match_start = match.begin(0)
138
+ match_end = match.end(0)
139
+
140
+ function_text = find_object_from_startpoint(curr_substring, match_end)
141
+ full_function_def = curr_substring[0, match_end + function_text.length]
142
+ results << full_function_def
143
+ curr_substring = curr_substring[full_function_def.length + 1..-1]
144
+ else
145
+ match = comma_regex.match(curr_substring)
146
+
147
+ begin
148
+ match_start = match.begin(0)
149
+ match_end = match.end(0)
150
+ rescue NoMethodError
151
+ match_start = curr_substring.length - 1
152
+ match_end = match_start + 1
153
+ end
154
+
155
+ curr_el = curr_substring[0, match_start]
156
+ results << curr_el
157
+ curr_substring = curr_substring[match_end..-1]
158
+ end
159
+ end
160
+
161
+ results
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,75 @@
1
+ module RubyTube
2
+ module Request
3
+ module_function
4
+
5
+ DEFAULT_RANGE_SIZE = 9437184
6
+
7
+ def get(url, options = {})
8
+ send(:get, url, options).body
9
+ end
10
+
11
+ def post(url, options = {})
12
+ send(:post, url, options).body
13
+ end
14
+
15
+ def head(url, options = {})
16
+ send(:head, url, options).headers
17
+ end
18
+
19
+ def stream(url, timeout: 60, max_retries: 0)
20
+ file_size = DEFAULT_RANGE_SIZE
21
+ downloaded = 0
22
+
23
+ while downloaded < file_size
24
+ stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
25
+ range_header = "bytes=#{downloaded}-#{stop_pos}"
26
+ tries = 0
27
+
28
+ while true
29
+ begin
30
+ if tries >= 1 + max_retries
31
+ raise MaxRetriesExceeded
32
+ end
33
+ response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
34
+ break
35
+ rescue Faraday::TimeoutError
36
+ rescue Faraday::ClientError => e
37
+ raise e
38
+ end
39
+ tries += 1
40
+ end
41
+
42
+ if file_size == DEFAULT_RANGE_SIZE
43
+ begin
44
+ resp = send(:get, "#{url}&range=0-99999999999")
45
+ content_range = resp.headers["Content-Length"]
46
+ file_size = content_range.to_i
47
+ rescue KeyError, IndexError, StandardError => e
48
+ end
49
+ end
50
+
51
+ response.body.each_char do |chunk|
52
+ downloaded += chunk.length
53
+ yield chunk
54
+ end
55
+ end
56
+ end
57
+
58
+ def send(method, url, options = {})
59
+ headers = { 'Content-Type': 'text/html' }
60
+ options[:headers] && headers.merge!(options[:headers])
61
+
62
+ connection = Faraday.new(url: url) do |faraday|
63
+ faraday.response :follow_redirects
64
+ faraday.adapter Faraday.default_adapter
65
+ end
66
+ response = connection.send(method) do |req|
67
+ req.headers = headers
68
+ options[:query] && req.params = options[:query]
69
+ options[:data] && req.body = JSON.dump(options[:data])
70
+ end
71
+
72
+ response
73
+ end
74
+ end
75
+ end