rubytube 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.standard.yml +3 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +21 -0
- data/README.md +19 -0
- data/Rakefile +10 -0
- data/lib/rubytube/cipher.rb +369 -0
- data/lib/rubytube/client.rb +165 -0
- data/lib/rubytube/extractor.rb +177 -0
- data/lib/rubytube/innertube.rb +105 -0
- data/lib/rubytube/monostate.rb +5 -0
- data/lib/rubytube/parser.rb +164 -0
- data/lib/rubytube/request.rb +75 -0
- data/lib/rubytube/stream.rb +81 -0
- data/lib/rubytube/stream_query.rb +33 -0
- data/lib/rubytube/utils.rb +24 -0
- data/lib/rubytube/version.rb +5 -0
- data/lib/rubytube.rb +67 -0
- data/sig/rubytube.rbs +4 -0
- metadata +95 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
module RubyTube
|
2
|
+
class Client
|
3
|
+
attr_accessor :video_id, :watch_url, :embed_url, :stream_monostate
|
4
|
+
|
5
|
+
def initialize(url)
|
6
|
+
self.video_id = Extractor.video_id(url)
|
7
|
+
|
8
|
+
self.watch_url = "https://youtube.com/watch?v=#{video_id}"
|
9
|
+
self.embed_url = "https://www.youtube.com/embed/#{video_id}"
|
10
|
+
|
11
|
+
self.stream_monostate = Monostate.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def watch_html
|
15
|
+
return @watch_html if @watch_html
|
16
|
+
|
17
|
+
@watch_html = Request.get(watch_url)
|
18
|
+
@watch_html
|
19
|
+
end
|
20
|
+
|
21
|
+
def js
|
22
|
+
return @js if @js
|
23
|
+
|
24
|
+
@js = Request.get(js_url)
|
25
|
+
@js
|
26
|
+
end
|
27
|
+
|
28
|
+
def js_url
|
29
|
+
return @js_url if @js_url
|
30
|
+
|
31
|
+
@js_url = Extractor.js_url(watch_html)
|
32
|
+
@js_url
|
33
|
+
end
|
34
|
+
|
35
|
+
def streaming_data
|
36
|
+
return vid_info['streamingData'] if vid_info && vid_info.key?('streamingData')
|
37
|
+
|
38
|
+
bypass_age_gate
|
39
|
+
vid_info['streamingData']
|
40
|
+
end
|
41
|
+
|
42
|
+
def fmt_streams
|
43
|
+
check_availability
|
44
|
+
return @fmt_streams if @fmt_streams
|
45
|
+
|
46
|
+
@fmt_streams = []
|
47
|
+
stream_manifest = Extractor.apply_descrambler(streaming_data)
|
48
|
+
|
49
|
+
begin
|
50
|
+
Extractor.apply_signature(stream_manifest, vid_info, js)
|
51
|
+
rescue ExtractError
|
52
|
+
js = nil
|
53
|
+
js_url = nil
|
54
|
+
Extractor.apply_signature(stream_manifest, vid_info, js)
|
55
|
+
end
|
56
|
+
|
57
|
+
for stream in stream_manifest
|
58
|
+
@fmt_streams << Stream.new(stream, stream_monostate)
|
59
|
+
end
|
60
|
+
|
61
|
+
stream_monostate.title = title
|
62
|
+
stream_monostate.duration = length
|
63
|
+
|
64
|
+
@fmt_streams
|
65
|
+
end
|
66
|
+
|
67
|
+
def check_availability
|
68
|
+
status, messages = Extractor.playability_status(watch_html)
|
69
|
+
|
70
|
+
messages.each do |reason|
|
71
|
+
case status
|
72
|
+
when 'UNPLAYABLE'
|
73
|
+
case reason
|
74
|
+
when 'Join this channel to get access to members-only content like this video, and other exclusive perks.'
|
75
|
+
raise MembersOnly.new(video_id)
|
76
|
+
when 'This live stream recording is not available.'
|
77
|
+
raise RecordingUnavailable.new(video_id)
|
78
|
+
else
|
79
|
+
raise VideoUnavailable.new(video_id)
|
80
|
+
end
|
81
|
+
when 'LOGIN_REQUIRED'
|
82
|
+
if reason == 'This is a private video. Please sign in to verify that you may see it.'
|
83
|
+
raise VideoPrivate.new(video_id)
|
84
|
+
end
|
85
|
+
when 'ERROR'
|
86
|
+
if reason == 'Video unavailable'
|
87
|
+
raise VideoUnavailable.new(video_id)
|
88
|
+
end
|
89
|
+
when 'LIVE_STREAM'
|
90
|
+
raise LiveStreamError.new(video_id)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def streams
|
96
|
+
return @streams if @streams
|
97
|
+
|
98
|
+
check_availability
|
99
|
+
@streams = StreamQuery.new(fmt_streams)
|
100
|
+
end
|
101
|
+
|
102
|
+
def vid_info
|
103
|
+
return @vid_info if @vid_info
|
104
|
+
|
105
|
+
it = InnerTube.new
|
106
|
+
@vid_info = it.player(video_id)
|
107
|
+
|
108
|
+
@vid_info
|
109
|
+
end
|
110
|
+
|
111
|
+
def bypass_age_gate
|
112
|
+
it = InnerTube.new(client: 'ANDROID_EMBED')
|
113
|
+
resp = it.player(video_id)
|
114
|
+
|
115
|
+
status = resp['playabilityStatus']['status']
|
116
|
+
if status == 'UNPLAYABLE'
|
117
|
+
raise VideoUnavailable.new(video_id)
|
118
|
+
end
|
119
|
+
|
120
|
+
@vid_info = resp
|
121
|
+
end
|
122
|
+
|
123
|
+
def title
|
124
|
+
return @title if @title
|
125
|
+
|
126
|
+
@title = vid_info['videoDetails']['title']
|
127
|
+
@title
|
128
|
+
end
|
129
|
+
|
130
|
+
def length
|
131
|
+
return @length if @length
|
132
|
+
|
133
|
+
@length = vid_info['videoDetails']['lengthSeconds'].to_i
|
134
|
+
@length
|
135
|
+
end
|
136
|
+
|
137
|
+
def views
|
138
|
+
return @views if @views
|
139
|
+
|
140
|
+
@views = vid_info['videoDetails']['viewCount'].to_i
|
141
|
+
@views
|
142
|
+
end
|
143
|
+
|
144
|
+
def author
|
145
|
+
return @author if @author
|
146
|
+
|
147
|
+
@author = vid_info['videoDetails']['author']
|
148
|
+
@author
|
149
|
+
end
|
150
|
+
|
151
|
+
def keywords
|
152
|
+
return @keywords if @keywords
|
153
|
+
|
154
|
+
@keywords = vid_info['videoDetails']['keywords']
|
155
|
+
@keywords
|
156
|
+
end
|
157
|
+
|
158
|
+
def channel_id
|
159
|
+
return @channel_id if @channel_id
|
160
|
+
|
161
|
+
@channel_id = vid_info['videoDetails']['channelId']
|
162
|
+
@channel_id
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,177 @@
|
|
1
|
+
module RubyTube
|
2
|
+
class Extractor
|
3
|
+
class << self
|
4
|
+
def playability_status(watch_html)
|
5
|
+
player_response = initial_player_response(watch_html)
|
6
|
+
player_response = JSON.parse(player_response)
|
7
|
+
status_obj = player_response['playabilityStatus'] || {}
|
8
|
+
|
9
|
+
if status_obj.has_key?('liveStreamability')
|
10
|
+
return ['LIVE_STREAM', 'Video is a live stream.']
|
11
|
+
end
|
12
|
+
|
13
|
+
if status_obj.has_key?('status')
|
14
|
+
if status_obj.has_key?('reason')
|
15
|
+
return [status_obj['status'], [status_obj['reason']]]
|
16
|
+
end
|
17
|
+
|
18
|
+
if status_obj.has_key?('messages')
|
19
|
+
return [status_obj['status'], status_obj['messages']]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
[nil, [nil]]
|
24
|
+
end
|
25
|
+
|
26
|
+
def video_id(url)
|
27
|
+
return Utils.regex_search(/(?:v=|\/)([0-9A-Za-z_-]{11}).*/, url, 1)
|
28
|
+
end
|
29
|
+
|
30
|
+
def js_url(html)
|
31
|
+
begin
|
32
|
+
base_js = get_ytplayer_config(html)['assets']['js']
|
33
|
+
rescue RegexMatchError, NoMethodError
|
34
|
+
base_js = get_ytplayer_js(html)
|
35
|
+
end
|
36
|
+
|
37
|
+
"https://youtube.com#{base_js}"
|
38
|
+
end
|
39
|
+
|
40
|
+
def mime_type_codec(mime_type_codec)
|
41
|
+
pattern = %r{(\w+\/\w+)\;\scodecs=\"([a-zA-Z\-0-9.,\s]*)\"}
|
42
|
+
results = mime_type_codec.match(pattern)
|
43
|
+
|
44
|
+
raise RegexMatchError.new("mime_type_codec, pattern=#{pattern}") if results.nil?
|
45
|
+
|
46
|
+
mime_type, codecs = results.captures
|
47
|
+
[mime_type, codecs.split(",").map(&:strip)]
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_ytplayer_js(html)
|
51
|
+
js_url_patterns = [
|
52
|
+
%r{(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)},
|
53
|
+
]
|
54
|
+
|
55
|
+
js_url_patterns.each do |pattern|
|
56
|
+
function_match = html.match(pattern)
|
57
|
+
if function_match
|
58
|
+
return function_match[1]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
raise RegexMatchError.new('get_ytplayer_js', 'js_url_patterns')
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_ytplayer_config(html)
|
66
|
+
config_patterns = [
|
67
|
+
/ytplayer\.config\s*=\s*/,
|
68
|
+
/ytInitialPlayerResponse\s*=\s*/
|
69
|
+
]
|
70
|
+
|
71
|
+
config_patterns.each do |pattern|
|
72
|
+
begin
|
73
|
+
return Parser.parse_for_object(html, pattern)
|
74
|
+
rescue HTMLParseError => e
|
75
|
+
next
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
setconfig_patterns = [
|
80
|
+
/yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*/
|
81
|
+
]
|
82
|
+
|
83
|
+
setconfig_patterns.each do |pattern|
|
84
|
+
begin
|
85
|
+
return Parser.parse_for_object(html, pattern)
|
86
|
+
rescue HTMLParseError => e
|
87
|
+
next
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
raise RegexMatchError.new('get_ytplayer_config', 'config_patterns, setconfig_patterns')
|
92
|
+
end
|
93
|
+
|
94
|
+
def apply_signature(stream_manifest, vid_info, js)
|
95
|
+
cipher = Cipher.new(js)
|
96
|
+
|
97
|
+
stream_manifest.each_with_index do |stream, i|
|
98
|
+
begin
|
99
|
+
url = stream['url']
|
100
|
+
rescue NoMethodError
|
101
|
+
live_stream = vid_info.fetch('playabilityStatus', {})['liveStreamability']
|
102
|
+
if live_stream
|
103
|
+
raise LiveStreamError.new('UNKNOWN')
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
if url.include?("signature") ||
|
108
|
+
(!stream.key?("s") && (url.include?("&sig=") || url.include?("&lsig=")))
|
109
|
+
# For certain videos, YouTube will just provide them pre-signed, in
|
110
|
+
# which case there's no real magic to download them and we can skip
|
111
|
+
# the whole signature descrambling entirely.
|
112
|
+
next
|
113
|
+
end
|
114
|
+
|
115
|
+
signature = cipher.get_signature(stream['s'])
|
116
|
+
|
117
|
+
parsed_url = URI.parse(url)
|
118
|
+
|
119
|
+
query_params = CGI.parse(parsed_url.query)
|
120
|
+
query_params.transform_values!(&:first)
|
121
|
+
query_params['sig'] = signature
|
122
|
+
unless query_params.key?('ratebypass')
|
123
|
+
initial_n = query_params['n'].split('')
|
124
|
+
new_n = cipher.calculate_n(initial_n)
|
125
|
+
query_params['n'] = new_n
|
126
|
+
end
|
127
|
+
|
128
|
+
url = "#{parsed_url.scheme}://#{parsed_url.host}#{parsed_url.path}?#{URI.encode_www_form(query_params)}"
|
129
|
+
|
130
|
+
stream_manifest[i]["url"] = url
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def apply_descrambler(stream_data)
|
135
|
+
return if stream_data.has_key?('url')
|
136
|
+
|
137
|
+
# Merge formats and adaptiveFormats into a single array
|
138
|
+
formats = []
|
139
|
+
formats += stream_data['formats'] if stream_data.has_key?('formats')
|
140
|
+
formats += stream_data['adaptiveFormats'] if stream_data.has_key?('adaptiveFormats')
|
141
|
+
|
142
|
+
# Extract url and s from signatureCiphers as necessary
|
143
|
+
formats.each do |data|
|
144
|
+
unless data.has_key?('url')
|
145
|
+
if data.has_key?('signatureCipher')
|
146
|
+
cipher_url = URI.decode_www_form(data['signatureCipher']).to_h
|
147
|
+
data['url'] = cipher_url['url']
|
148
|
+
data['s'] = cipher_url['s']
|
149
|
+
end
|
150
|
+
end
|
151
|
+
data['is_otf'] = data['type'] == 'FORMAT_STREAM_TYPE_OTF'
|
152
|
+
end
|
153
|
+
|
154
|
+
formats
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
|
159
|
+
def initial_player_response(watch_html)
|
160
|
+
patterns = [
|
161
|
+
"window\\[['\"]ytInitialPlayerResponse['\"]\\]\\s*=\\s*",
|
162
|
+
"ytInitialPlayerResponse\\s*=\\s*"
|
163
|
+
]
|
164
|
+
|
165
|
+
patterns.each do |pattern|
|
166
|
+
begin
|
167
|
+
return Parser.parse_for_object(watch_html, pattern)
|
168
|
+
rescue HTMLParseError
|
169
|
+
next
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
raise RegexMatchError.new('initial_player_response', 'initial_player_response_pattern')
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module RubyTube
|
2
|
+
class InnerTube
|
3
|
+
DEFALUT_CLIENTS = {
|
4
|
+
'WEB' => {
|
5
|
+
context: {
|
6
|
+
client: {
|
7
|
+
clientName: 'WEB',
|
8
|
+
clientVersion: '2.20200720.00.02'
|
9
|
+
}
|
10
|
+
},
|
11
|
+
header: { 'User-Agent': 'Mozilla/5.0' },
|
12
|
+
api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
13
|
+
},
|
14
|
+
'ANDROID_MUSIC' => {
|
15
|
+
context: {
|
16
|
+
client: {
|
17
|
+
clientName: 'ANDROID_MUSIC',
|
18
|
+
clientVersion: '5.16.51',
|
19
|
+
androidSdkVersion: 30,
|
20
|
+
},
|
21
|
+
},
|
22
|
+
header: { 'User-Agent': 'com.google.android.apps.youtube.music/'},
|
23
|
+
api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
24
|
+
},
|
25
|
+
'ANDROID_EMBED' => {
|
26
|
+
context: {
|
27
|
+
client: {
|
28
|
+
clientName: 'ANDROID_EMBEDDED_PLAYER',
|
29
|
+
clientVersion: '17.31.35',
|
30
|
+
clientScreen: 'EMBED',
|
31
|
+
androidSdkVersion: 30,
|
32
|
+
}
|
33
|
+
},
|
34
|
+
header: { 'User-Agent': 'com.google.android.youtube/' },
|
35
|
+
api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
|
36
|
+
},
|
37
|
+
}
|
38
|
+
|
39
|
+
BASE_URL = 'https://www.youtube.com/youtubei/v1'
|
40
|
+
|
41
|
+
attr_accessor :context, :header, :api_key, :access_token, :refresh_token, :use_oauth, :allow_cache, :expires
|
42
|
+
|
43
|
+
def initialize(client: 'ANDROID_MUSIC', use_oauth: false, allow_cache: false)
|
44
|
+
self.context = DEFALUT_CLIENTS[client][:context]
|
45
|
+
self.header = DEFALUT_CLIENTS[client][:header]
|
46
|
+
self.api_key = DEFALUT_CLIENTS[client][:api_key]
|
47
|
+
self.use_oauth = use_oauth
|
48
|
+
self.allow_cache = allow_cache
|
49
|
+
end
|
50
|
+
|
51
|
+
def cache_tokens
|
52
|
+
return unless allow_cache
|
53
|
+
|
54
|
+
# TODO:
|
55
|
+
end
|
56
|
+
|
57
|
+
def refresh_bearer_token(force: false)
|
58
|
+
# TODO:
|
59
|
+
end
|
60
|
+
|
61
|
+
def fetch_bearer_token
|
62
|
+
# TODO:
|
63
|
+
end
|
64
|
+
|
65
|
+
def send(endpoint, query, data)
|
66
|
+
if use_oauth
|
67
|
+
query.delete(:key)
|
68
|
+
end
|
69
|
+
|
70
|
+
headers = {
|
71
|
+
'Content-Type': 'application/json',
|
72
|
+
}
|
73
|
+
|
74
|
+
if use_oauth
|
75
|
+
if access_token
|
76
|
+
refresh_bearer_token
|
77
|
+
headers['Authorization'] = "Bearer #{access_token}"
|
78
|
+
else
|
79
|
+
fetch_bearer_token
|
80
|
+
headers['Authorization'] = "Bearer #{access_token}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
options = {}
|
85
|
+
options[:headers] = headers.merge(header)
|
86
|
+
|
87
|
+
options[:query] = {
|
88
|
+
key: api_key,
|
89
|
+
contentCheckOk: true,
|
90
|
+
racyCheckOk: true,
|
91
|
+
}.merge(query)
|
92
|
+
options[:data] = data
|
93
|
+
|
94
|
+
resp = Request.post(endpoint, options)
|
95
|
+
JSON.parse(resp)
|
96
|
+
end
|
97
|
+
|
98
|
+
def player(video_id)
|
99
|
+
endpoint = "#{BASE_URL}/player"
|
100
|
+
query = { 'videoId' => video_id }
|
101
|
+
|
102
|
+
send(endpoint, query, {context: context})
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
module RubyTube
|
2
|
+
module Parser
|
3
|
+
module_function
|
4
|
+
|
5
|
+
def parse_for_object(html, preceding_regex)
|
6
|
+
regex = Regexp.new(preceding_regex)
|
7
|
+
result = regex.match(html)
|
8
|
+
|
9
|
+
if result.nil?
|
10
|
+
raise HTMLParseError, "No matches for regex #{preceding_regex}"
|
11
|
+
end
|
12
|
+
start_index = result.end(0)
|
13
|
+
|
14
|
+
return parse_for_object_from_startpoint(html, start_index)
|
15
|
+
end
|
16
|
+
|
17
|
+
def find_object_from_startpoint(html, start_point)
|
18
|
+
html = html[start_point..-1]
|
19
|
+
unless ['{', '['].include?(html[0])
|
20
|
+
raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
|
21
|
+
end
|
22
|
+
|
23
|
+
last_char = '{'
|
24
|
+
curr_char = nil
|
25
|
+
stack = [html[0]]
|
26
|
+
i = 1
|
27
|
+
|
28
|
+
context_closers = {
|
29
|
+
'{' => '}',
|
30
|
+
'[' => ']',
|
31
|
+
'"' => '"',
|
32
|
+
'/' => '/',
|
33
|
+
}
|
34
|
+
|
35
|
+
while i < html.length
|
36
|
+
break if stack.empty?
|
37
|
+
|
38
|
+
last_char = curr_char unless [' ', '\n'].include?(curr_char)
|
39
|
+
curr_char = html[i]
|
40
|
+
curr_context = stack.last
|
41
|
+
|
42
|
+
if curr_char == context_closers[curr_context]
|
43
|
+
stack.pop
|
44
|
+
i += 1
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
if ['"', '/'].include?(curr_context)
|
49
|
+
if curr_char == '\\'
|
50
|
+
i += 2
|
51
|
+
next
|
52
|
+
end
|
53
|
+
else
|
54
|
+
if context_closers.keys.include?(curr_char)
|
55
|
+
unless curr_char == '/' && !['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
|
56
|
+
stack.push(curr_char)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
i += 1
|
62
|
+
end
|
63
|
+
|
64
|
+
full_obj = html[0...i]
|
65
|
+
full_obj
|
66
|
+
end
|
67
|
+
|
68
|
+
def parse_for_object_from_startpoint(html, start_point)
|
69
|
+
html = html[start_point..-1]
|
70
|
+
|
71
|
+
unless ['{', '['].include?(html[0])
|
72
|
+
raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
|
73
|
+
end
|
74
|
+
|
75
|
+
# First letter MUST be an open brace, so we put that in the stack,
|
76
|
+
# and skip the first character.
|
77
|
+
last_char = '{'
|
78
|
+
curr_char = nil
|
79
|
+
stack = [html[0]]
|
80
|
+
i = 1
|
81
|
+
|
82
|
+
context_closers = {
|
83
|
+
'{' => '}',
|
84
|
+
'[' => ']',
|
85
|
+
'"' => '"',
|
86
|
+
'/' => '/' # JavaScript regex
|
87
|
+
}
|
88
|
+
|
89
|
+
while i < html.length
|
90
|
+
break if stack.empty?
|
91
|
+
|
92
|
+
last_char = curr_char unless [' ', '\n'].include?(curr_char)
|
93
|
+
curr_char = html[i]
|
94
|
+
curr_context = stack.last
|
95
|
+
|
96
|
+
# If we've reached a context closer, we can remove an element off the stack
|
97
|
+
if curr_char == context_closers[curr_context]
|
98
|
+
stack.pop
|
99
|
+
i += 1
|
100
|
+
next
|
101
|
+
end
|
102
|
+
# Strings and regex expressions require special context handling because they can contain
|
103
|
+
# context openers *and* closers
|
104
|
+
if ['"', '/'].include?(curr_context)
|
105
|
+
# If there's a backslash in a string or regex expression, we skip a character
|
106
|
+
if curr_char == '\\'
|
107
|
+
i += 2
|
108
|
+
next
|
109
|
+
end
|
110
|
+
else
|
111
|
+
# Non-string contexts are when we need to look for context openers.
|
112
|
+
if context_closers.keys.include?(curr_char)
|
113
|
+
# Slash starts a regular expression depending on context
|
114
|
+
unless curr_char == '/' && ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
|
115
|
+
stack << curr_char
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
i += 1
|
121
|
+
end
|
122
|
+
|
123
|
+
full_obj = html[0..(i - 1)]
|
124
|
+
full_obj
|
125
|
+
end
|
126
|
+
|
127
|
+
def throttling_array_split(js_array)
|
128
|
+
results = []
|
129
|
+
curr_substring = js_array[1..-1]
|
130
|
+
|
131
|
+
comma_regex = /,/
|
132
|
+
func_regex = /function\([^)]*\)/
|
133
|
+
|
134
|
+
until curr_substring.empty?
|
135
|
+
if curr_substring.start_with?('function')
|
136
|
+
match = func_regex.match(curr_substring)
|
137
|
+
match_start = match.begin(0)
|
138
|
+
match_end = match.end(0)
|
139
|
+
|
140
|
+
function_text = find_object_from_startpoint(curr_substring, match_end)
|
141
|
+
full_function_def = curr_substring[0, match_end + function_text.length]
|
142
|
+
results << full_function_def
|
143
|
+
curr_substring = curr_substring[full_function_def.length + 1..-1]
|
144
|
+
else
|
145
|
+
match = comma_regex.match(curr_substring)
|
146
|
+
|
147
|
+
begin
|
148
|
+
match_start = match.begin(0)
|
149
|
+
match_end = match.end(0)
|
150
|
+
rescue NoMethodError
|
151
|
+
match_start = curr_substring.length - 1
|
152
|
+
match_end = match_start + 1
|
153
|
+
end
|
154
|
+
|
155
|
+
curr_el = curr_substring[0, match_start]
|
156
|
+
results << curr_el
|
157
|
+
curr_substring = curr_substring[match_end..-1]
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
results
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module RubyTube
|
2
|
+
module Request
|
3
|
+
module_function
|
4
|
+
|
5
|
+
DEFAULT_RANGE_SIZE = 9437184
|
6
|
+
|
7
|
+
def get(url, options = {})
|
8
|
+
send(:get, url, options).body
|
9
|
+
end
|
10
|
+
|
11
|
+
def post(url, options = {})
|
12
|
+
send(:post, url, options).body
|
13
|
+
end
|
14
|
+
|
15
|
+
def head(url, options = {})
|
16
|
+
send(:head, url, options).headers
|
17
|
+
end
|
18
|
+
|
19
|
+
def stream(url, timeout: 60, max_retries: 0)
|
20
|
+
file_size = DEFAULT_RANGE_SIZE
|
21
|
+
downloaded = 0
|
22
|
+
|
23
|
+
while downloaded < file_size
|
24
|
+
stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
|
25
|
+
range_header = "bytes=#{downloaded}-#{stop_pos}"
|
26
|
+
tries = 0
|
27
|
+
|
28
|
+
while true
|
29
|
+
begin
|
30
|
+
if tries >= 1 + max_retries
|
31
|
+
raise MaxRetriesExceeded
|
32
|
+
end
|
33
|
+
response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
|
34
|
+
break
|
35
|
+
rescue Faraday::TimeoutError
|
36
|
+
rescue Faraday::ClientError => e
|
37
|
+
raise e
|
38
|
+
end
|
39
|
+
tries += 1
|
40
|
+
end
|
41
|
+
|
42
|
+
if file_size == DEFAULT_RANGE_SIZE
|
43
|
+
begin
|
44
|
+
resp = send(:get, "#{url}&range=0-99999999999")
|
45
|
+
content_range = resp.headers["Content-Length"]
|
46
|
+
file_size = content_range.to_i
|
47
|
+
rescue KeyError, IndexError, StandardError => e
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
response.body.each_char do |chunk|
|
52
|
+
downloaded += chunk.length
|
53
|
+
yield chunk
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def send(method, url, options = {})
|
59
|
+
headers = { 'Content-Type': 'text/html' }
|
60
|
+
options[:headers] && headers.merge!(options[:headers])
|
61
|
+
|
62
|
+
connection = Faraday.new(url: url) do |faraday|
|
63
|
+
faraday.response :follow_redirects
|
64
|
+
faraday.adapter Faraday.default_adapter
|
65
|
+
end
|
66
|
+
response = connection.send(method) do |req|
|
67
|
+
req.headers = headers
|
68
|
+
options[:query] && req.params = options[:query]
|
69
|
+
options[:data] && req.body = JSON.dump(options[:data])
|
70
|
+
end
|
71
|
+
|
72
|
+
response
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|