rubytube 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ module RubyTube
2
+ class Client
3
+ attr_accessor :video_id, :watch_url, :embed_url, :stream_monostate
4
+
5
+ def initialize(url)
6
+ self.video_id = Extractor.video_id(url)
7
+
8
+ self.watch_url = "https://youtube.com/watch?v=#{video_id}"
9
+ self.embed_url = "https://www.youtube.com/embed/#{video_id}"
10
+
11
+ self.stream_monostate = Monostate.new
12
+ end
13
+
14
+ def watch_html
15
+ return @watch_html if @watch_html
16
+
17
+ @watch_html = Request.get(watch_url)
18
+ @watch_html
19
+ end
20
+
21
+ def js
22
+ return @js if @js
23
+
24
+ @js = Request.get(js_url)
25
+ @js
26
+ end
27
+
28
+ def js_url
29
+ return @js_url if @js_url
30
+
31
+ @js_url = Extractor.js_url(watch_html)
32
+ @js_url
33
+ end
34
+
35
+ def streaming_data
36
+ return vid_info['streamingData'] if vid_info && vid_info.key?('streamingData')
37
+
38
+ bypass_age_gate
39
+ vid_info['streamingData']
40
+ end
41
+
42
+ def fmt_streams
43
+ check_availability
44
+ return @fmt_streams if @fmt_streams
45
+
46
+ @fmt_streams = []
47
+ stream_manifest = Extractor.apply_descrambler(streaming_data)
48
+
49
+ begin
50
+ Extractor.apply_signature(stream_manifest, vid_info, js)
51
+ rescue ExtractError
52
+ js = nil
53
+ js_url = nil
54
+ Extractor.apply_signature(stream_manifest, vid_info, js)
55
+ end
56
+
57
+ for stream in stream_manifest
58
+ @fmt_streams << Stream.new(stream, stream_monostate)
59
+ end
60
+
61
+ stream_monostate.title = title
62
+ stream_monostate.duration = length
63
+
64
+ @fmt_streams
65
+ end
66
+
67
+ def check_availability
68
+ status, messages = Extractor.playability_status(watch_html)
69
+
70
+ messages.each do |reason|
71
+ case status
72
+ when 'UNPLAYABLE'
73
+ case reason
74
+ when 'Join this channel to get access to members-only content like this video, and other exclusive perks.'
75
+ raise MembersOnly.new(video_id)
76
+ when 'This live stream recording is not available.'
77
+ raise RecordingUnavailable.new(video_id)
78
+ else
79
+ raise VideoUnavailable.new(video_id)
80
+ end
81
+ when 'LOGIN_REQUIRED'
82
+ if reason == 'This is a private video. Please sign in to verify that you may see it.'
83
+ raise VideoPrivate.new(video_id)
84
+ end
85
+ when 'ERROR'
86
+ if reason == 'Video unavailable'
87
+ raise VideoUnavailable.new(video_id)
88
+ end
89
+ when 'LIVE_STREAM'
90
+ raise LiveStreamError.new(video_id)
91
+ end
92
+ end
93
+ end
94
+
95
+ def streams
96
+ return @streams if @streams
97
+
98
+ check_availability
99
+ @streams = StreamQuery.new(fmt_streams)
100
+ end
101
+
102
+ def vid_info
103
+ return @vid_info if @vid_info
104
+
105
+ it = InnerTube.new
106
+ @vid_info = it.player(video_id)
107
+
108
+ @vid_info
109
+ end
110
+
111
+ def bypass_age_gate
112
+ it = InnerTube.new(client: 'ANDROID_EMBED')
113
+ resp = it.player(video_id)
114
+
115
+ status = resp['playabilityStatus']['status']
116
+ if status == 'UNPLAYABLE'
117
+ raise VideoUnavailable.new(video_id)
118
+ end
119
+
120
+ @vid_info = resp
121
+ end
122
+
123
+ def title
124
+ return @title if @title
125
+
126
+ @title = vid_info['videoDetails']['title']
127
+ @title
128
+ end
129
+
130
+ def length
131
+ return @length if @length
132
+
133
+ @length = vid_info['videoDetails']['lengthSeconds'].to_i
134
+ @length
135
+ end
136
+
137
+ def views
138
+ return @views if @views
139
+
140
+ @views = vid_info['videoDetails']['viewCount'].to_i
141
+ @views
142
+ end
143
+
144
+ def author
145
+ return @author if @author
146
+
147
+ @author = vid_info['videoDetails']['author']
148
+ @author
149
+ end
150
+
151
+ def keywords
152
+ return @keywords if @keywords
153
+
154
+ @keywords = vid_info['videoDetails']['keywords']
155
+ @keywords
156
+ end
157
+
158
+ def channel_id
159
+ return @channel_id if @channel_id
160
+
161
+ @channel_id = vid_info['videoDetails']['channelId']
162
+ @channel_id
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,177 @@
1
+ module RubyTube
2
+ class Extractor
3
+ class << self
4
+ def playability_status(watch_html)
5
+ player_response = initial_player_response(watch_html)
6
+ player_response = JSON.parse(player_response)
7
+ status_obj = player_response['playabilityStatus'] || {}
8
+
9
+ if status_obj.has_key?('liveStreamability')
10
+ return ['LIVE_STREAM', 'Video is a live stream.']
11
+ end
12
+
13
+ if status_obj.has_key?('status')
14
+ if status_obj.has_key?('reason')
15
+ return [status_obj['status'], [status_obj['reason']]]
16
+ end
17
+
18
+ if status_obj.has_key?('messages')
19
+ return [status_obj['status'], status_obj['messages']]
20
+ end
21
+ end
22
+
23
+ [nil, [nil]]
24
+ end
25
+
26
+ def video_id(url)
27
+ return Utils.regex_search(/(?:v=|\/)([0-9A-Za-z_-]{11}).*/, url, 1)
28
+ end
29
+
30
+ def js_url(html)
31
+ begin
32
+ base_js = get_ytplayer_config(html)['assets']['js']
33
+ rescue RegexMatchError, NoMethodError
34
+ base_js = get_ytplayer_js(html)
35
+ end
36
+
37
+ "https://youtube.com#{base_js}"
38
+ end
39
+
40
+ def mime_type_codec(mime_type_codec)
41
+ pattern = %r{(\w+\/\w+)\;\scodecs=\"([a-zA-Z\-0-9.,\s]*)\"}
42
+ results = mime_type_codec.match(pattern)
43
+
44
+ raise RegexMatchError.new("mime_type_codec, pattern=#{pattern}") if results.nil?
45
+
46
+ mime_type, codecs = results.captures
47
+ [mime_type, codecs.split(",").map(&:strip)]
48
+ end
49
+
50
+ def get_ytplayer_js(html)
51
+ js_url_patterns = [
52
+ %r{(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)},
53
+ ]
54
+
55
+ js_url_patterns.each do |pattern|
56
+ function_match = html.match(pattern)
57
+ if function_match
58
+ return function_match[1]
59
+ end
60
+ end
61
+
62
+ raise RegexMatchError.new('get_ytplayer_js', 'js_url_patterns')
63
+ end
64
+
65
+ def get_ytplayer_config(html)
66
+ config_patterns = [
67
+ /ytplayer\.config\s*=\s*/,
68
+ /ytInitialPlayerResponse\s*=\s*/
69
+ ]
70
+
71
+ config_patterns.each do |pattern|
72
+ begin
73
+ return Parser.parse_for_object(html, pattern)
74
+ rescue HTMLParseError => e
75
+ next
76
+ end
77
+ end
78
+
79
+ setconfig_patterns = [
80
+ /yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*/
81
+ ]
82
+
83
+ setconfig_patterns.each do |pattern|
84
+ begin
85
+ return Parser.parse_for_object(html, pattern)
86
+ rescue HTMLParseError => e
87
+ next
88
+ end
89
+ end
90
+
91
+ raise RegexMatchError.new('get_ytplayer_config', 'config_patterns, setconfig_patterns')
92
+ end
93
+
94
+ def apply_signature(stream_manifest, vid_info, js)
95
+ cipher = Cipher.new(js)
96
+
97
+ stream_manifest.each_with_index do |stream, i|
98
+ begin
99
+ url = stream['url']
100
+ rescue NoMethodError
101
+ live_stream = vid_info.fetch('playabilityStatus', {})['liveStreamability']
102
+ if live_stream
103
+ raise LiveStreamError.new('UNKNOWN')
104
+ end
105
+ end
106
+
107
+ if url.include?("signature") ||
108
+ (!stream.key?("s") && (url.include?("&sig=") || url.include?("&lsig=")))
109
+ # For certain videos, YouTube will just provide them pre-signed, in
110
+ # which case there's no real magic to download them and we can skip
111
+ # the whole signature descrambling entirely.
112
+ next
113
+ end
114
+
115
+ signature = cipher.get_signature(stream['s'])
116
+
117
+ parsed_url = URI.parse(url)
118
+
119
+ query_params = CGI.parse(parsed_url.query)
120
+ query_params.transform_values!(&:first)
121
+ query_params['sig'] = signature
122
+ unless query_params.key?('ratebypass')
123
+ initial_n = query_params['n'].split('')
124
+ new_n = cipher.calculate_n(initial_n)
125
+ query_params['n'] = new_n
126
+ end
127
+
128
+ url = "#{parsed_url.scheme}://#{parsed_url.host}#{parsed_url.path}?#{URI.encode_www_form(query_params)}"
129
+
130
+ stream_manifest[i]["url"] = url
131
+ end
132
+ end
133
+
134
+ def apply_descrambler(stream_data)
135
+ return if stream_data.has_key?('url')
136
+
137
+ # Merge formats and adaptiveFormats into a single array
138
+ formats = []
139
+ formats += stream_data['formats'] if stream_data.has_key?('formats')
140
+ formats += stream_data['adaptiveFormats'] if stream_data.has_key?('adaptiveFormats')
141
+
142
+ # Extract url and s from signatureCiphers as necessary
143
+ formats.each do |data|
144
+ unless data.has_key?('url')
145
+ if data.has_key?('signatureCipher')
146
+ cipher_url = URI.decode_www_form(data['signatureCipher']).to_h
147
+ data['url'] = cipher_url['url']
148
+ data['s'] = cipher_url['s']
149
+ end
150
+ end
151
+ data['is_otf'] = data['type'] == 'FORMAT_STREAM_TYPE_OTF'
152
+ end
153
+
154
+ formats
155
+ end
156
+
157
+ private
158
+
159
+ def initial_player_response(watch_html)
160
+ patterns = [
161
+ "window\\[['\"]ytInitialPlayerResponse['\"]\\]\\s*=\\s*",
162
+ "ytInitialPlayerResponse\\s*=\\s*"
163
+ ]
164
+
165
+ patterns.each do |pattern|
166
+ begin
167
+ return Parser.parse_for_object(watch_html, pattern)
168
+ rescue HTMLParseError
169
+ next
170
+ end
171
+ end
172
+
173
+ raise RegexMatchError.new('initial_player_response', 'initial_player_response_pattern')
174
+ end
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,105 @@
1
+ module RubyTube
2
+ class InnerTube
3
+ DEFALUT_CLIENTS = {
4
+ 'WEB' => {
5
+ context: {
6
+ client: {
7
+ clientName: 'WEB',
8
+ clientVersion: '2.20200720.00.02'
9
+ }
10
+ },
11
+ header: { 'User-Agent': 'Mozilla/5.0' },
12
+ api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
13
+ },
14
+ 'ANDROID_MUSIC' => {
15
+ context: {
16
+ client: {
17
+ clientName: 'ANDROID_MUSIC',
18
+ clientVersion: '5.16.51',
19
+ androidSdkVersion: 30,
20
+ },
21
+ },
22
+ header: { 'User-Agent': 'com.google.android.apps.youtube.music/'},
23
+ api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
24
+ },
25
+ 'ANDROID_EMBED' => {
26
+ context: {
27
+ client: {
28
+ clientName: 'ANDROID_EMBEDDED_PLAYER',
29
+ clientVersion: '17.31.35',
30
+ clientScreen: 'EMBED',
31
+ androidSdkVersion: 30,
32
+ }
33
+ },
34
+ header: { 'User-Agent': 'com.google.android.youtube/' },
35
+ api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
36
+ },
37
+ }
38
+
39
+ BASE_URL = 'https://www.youtube.com/youtubei/v1'
40
+
41
+ attr_accessor :context, :header, :api_key, :access_token, :refresh_token, :use_oauth, :allow_cache, :expires
42
+
43
+ def initialize(client: 'ANDROID_MUSIC', use_oauth: false, allow_cache: false)
44
+ self.context = DEFALUT_CLIENTS[client][:context]
45
+ self.header = DEFALUT_CLIENTS[client][:header]
46
+ self.api_key = DEFALUT_CLIENTS[client][:api_key]
47
+ self.use_oauth = use_oauth
48
+ self.allow_cache = allow_cache
49
+ end
50
+
51
+ def cache_tokens
52
+ return unless allow_cache
53
+
54
+ # TODO:
55
+ end
56
+
57
+ def refresh_bearer_token(force: false)
58
+ # TODO:
59
+ end
60
+
61
+ def fetch_bearer_token
62
+ # TODO:
63
+ end
64
+
65
+ def send(endpoint, query, data)
66
+ if use_oauth
67
+ query.delete(:key)
68
+ end
69
+
70
+ headers = {
71
+ 'Content-Type': 'application/json',
72
+ }
73
+
74
+ if use_oauth
75
+ if access_token
76
+ refresh_bearer_token
77
+ headers['Authorization'] = "Bearer #{access_token}"
78
+ else
79
+ fetch_bearer_token
80
+ headers['Authorization'] = "Bearer #{access_token}"
81
+ end
82
+ end
83
+
84
+ options = {}
85
+ options[:headers] = headers.merge(header)
86
+
87
+ options[:query] = {
88
+ key: api_key,
89
+ contentCheckOk: true,
90
+ racyCheckOk: true,
91
+ }.merge(query)
92
+ options[:data] = data
93
+
94
+ resp = Request.post(endpoint, options)
95
+ JSON.parse(resp)
96
+ end
97
+
98
+ def player(video_id)
99
+ endpoint = "#{BASE_URL}/player"
100
+ query = { 'videoId' => video_id }
101
+
102
+ send(endpoint, query, {context: context})
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,5 @@
1
+ module RubyTube
2
+ class Monostate
3
+ attr_accessor :title, :duration
4
+ end
5
+ end
@@ -0,0 +1,164 @@
1
+ module RubyTube
2
+ module Parser
3
+ module_function
4
+
5
+ def parse_for_object(html, preceding_regex)
6
+ regex = Regexp.new(preceding_regex)
7
+ result = regex.match(html)
8
+
9
+ if result.nil?
10
+ raise HTMLParseError, "No matches for regex #{preceding_regex}"
11
+ end
12
+ start_index = result.end(0)
13
+
14
+ return parse_for_object_from_startpoint(html, start_index)
15
+ end
16
+
17
+ def find_object_from_startpoint(html, start_point)
18
+ html = html[start_point..-1]
19
+ unless ['{', '['].include?(html[0])
20
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
21
+ end
22
+
23
+ last_char = '{'
24
+ curr_char = nil
25
+ stack = [html[0]]
26
+ i = 1
27
+
28
+ context_closers = {
29
+ '{' => '}',
30
+ '[' => ']',
31
+ '"' => '"',
32
+ '/' => '/',
33
+ }
34
+
35
+ while i < html.length
36
+ break if stack.empty?
37
+
38
+ last_char = curr_char unless [' ', '\n'].include?(curr_char)
39
+ curr_char = html[i]
40
+ curr_context = stack.last
41
+
42
+ if curr_char == context_closers[curr_context]
43
+ stack.pop
44
+ i += 1
45
+ next
46
+ end
47
+
48
+ if ['"', '/'].include?(curr_context)
49
+ if curr_char == '\\'
50
+ i += 2
51
+ next
52
+ end
53
+ else
54
+ if context_closers.keys.include?(curr_char)
55
+ unless curr_char == '/' && !['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
56
+ stack.push(curr_char)
57
+ end
58
+ end
59
+ end
60
+
61
+ i += 1
62
+ end
63
+
64
+ full_obj = html[0...i]
65
+ full_obj
66
+ end
67
+
68
+ def parse_for_object_from_startpoint(html, start_point)
69
+ html = html[start_point..-1]
70
+
71
+ unless ['{', '['].include?(html[0])
72
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
73
+ end
74
+
75
+ # First letter MUST be an open brace, so we put that in the stack,
76
+ # and skip the first character.
77
+ last_char = '{'
78
+ curr_char = nil
79
+ stack = [html[0]]
80
+ i = 1
81
+
82
+ context_closers = {
83
+ '{' => '}',
84
+ '[' => ']',
85
+ '"' => '"',
86
+ '/' => '/' # JavaScript regex
87
+ }
88
+
89
+ while i < html.length
90
+ break if stack.empty?
91
+
92
+ last_char = curr_char unless [' ', '\n'].include?(curr_char)
93
+ curr_char = html[i]
94
+ curr_context = stack.last
95
+
96
+ # If we've reached a context closer, we can remove an element off the stack
97
+ if curr_char == context_closers[curr_context]
98
+ stack.pop
99
+ i += 1
100
+ next
101
+ end
102
+ # Strings and regex expressions require special context handling because they can contain
103
+ # context openers *and* closers
104
+ if ['"', '/'].include?(curr_context)
105
+ # If there's a backslash in a string or regex expression, we skip a character
106
+ if curr_char == '\\'
107
+ i += 2
108
+ next
109
+ end
110
+ else
111
+ # Non-string contexts are when we need to look for context openers.
112
+ if context_closers.keys.include?(curr_char)
113
+ # Slash starts a regular expression depending on context
114
+ unless curr_char == '/' && ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
115
+ stack << curr_char
116
+ end
117
+ end
118
+ end
119
+
120
+ i += 1
121
+ end
122
+
123
+ full_obj = html[0..(i - 1)]
124
+ full_obj
125
+ end
126
+
127
+ def throttling_array_split(js_array)
128
+ results = []
129
+ curr_substring = js_array[1..-1]
130
+
131
+ comma_regex = /,/
132
+ func_regex = /function\([^)]*\)/
133
+
134
+ until curr_substring.empty?
135
+ if curr_substring.start_with?('function')
136
+ match = func_regex.match(curr_substring)
137
+ match_start = match.begin(0)
138
+ match_end = match.end(0)
139
+
140
+ function_text = find_object_from_startpoint(curr_substring, match_end)
141
+ full_function_def = curr_substring[0, match_end + function_text.length]
142
+ results << full_function_def
143
+ curr_substring = curr_substring[full_function_def.length + 1..-1]
144
+ else
145
+ match = comma_regex.match(curr_substring)
146
+
147
+ begin
148
+ match_start = match.begin(0)
149
+ match_end = match.end(0)
150
+ rescue NoMethodError
151
+ match_start = curr_substring.length - 1
152
+ match_end = match_start + 1
153
+ end
154
+
155
+ curr_el = curr_substring[0, match_start]
156
+ results << curr_el
157
+ curr_substring = curr_substring[match_end..-1]
158
+ end
159
+ end
160
+
161
+ results
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,75 @@
1
+ module RubyTube
2
+ module Request
3
+ module_function
4
+
5
+ DEFAULT_RANGE_SIZE = 9437184
6
+
7
+ def get(url, options = {})
8
+ send(:get, url, options).body
9
+ end
10
+
11
+ def post(url, options = {})
12
+ send(:post, url, options).body
13
+ end
14
+
15
+ def head(url, options = {})
16
+ send(:head, url, options).headers
17
+ end
18
+
19
+ def stream(url, timeout: 60, max_retries: 0)
20
+ file_size = DEFAULT_RANGE_SIZE
21
+ downloaded = 0
22
+
23
+ while downloaded < file_size
24
+ stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
25
+ range_header = "bytes=#{downloaded}-#{stop_pos}"
26
+ tries = 0
27
+
28
+ while true
29
+ begin
30
+ if tries >= 1 + max_retries
31
+ raise MaxRetriesExceeded
32
+ end
33
+ response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
34
+ break
35
+ rescue Faraday::TimeoutError
36
+ rescue Faraday::ClientError => e
37
+ raise e
38
+ end
39
+ tries += 1
40
+ end
41
+
42
+ if file_size == DEFAULT_RANGE_SIZE
43
+ begin
44
+ resp = send(:get, "#{url}&range=0-99999999999")
45
+ content_range = resp.headers["Content-Length"]
46
+ file_size = content_range.to_i
47
+ rescue KeyError, IndexError, StandardError => e
48
+ end
49
+ end
50
+
51
+ response.body.each_char do |chunk|
52
+ downloaded += chunk.length
53
+ yield chunk
54
+ end
55
+ end
56
+ end
57
+
58
+ def send(method, url, options = {})
59
+ headers = { 'Content-Type': 'text/html' }
60
+ options[:headers] && headers.merge!(options[:headers])
61
+
62
+ connection = Faraday.new(url: url) do |faraday|
63
+ faraday.response :follow_redirects
64
+ faraday.adapter Faraday.default_adapter
65
+ end
66
+ response = connection.send(method) do |req|
67
+ req.headers = headers
68
+ options[:query] && req.params = options[:query]
69
+ options[:data] && req.body = JSON.dump(options[:data])
70
+ end
71
+
72
+ response
73
+ end
74
+ end
75
+ end