rubytube 0.3.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +3 -3
- data/.standard.yml +3 -3
- data/CHANGELOG.md +5 -5
- data/CODE_OF_CONDUCT.md +84 -84
- data/Gemfile +12 -12
- data/LICENSE.txt +21 -21
- data/README.md +28 -19
- data/Rakefile +10 -10
- data/lib/rubytube/cipher.rb +370 -371
- data/lib/rubytube/client.rb +173 -173
- data/lib/rubytube/error.rb +49 -0
- data/lib/rubytube/extractor.rb +171 -177
- data/lib/rubytube/innertube.rb +105 -105
- data/lib/rubytube/monostate.rb +5 -5
- data/lib/rubytube/parser.rb +159 -164
- data/lib/rubytube/request.rb +73 -75
- data/lib/rubytube/stream.rb +95 -97
- data/lib/rubytube/stream_format.rb +152 -152
- data/lib/rubytube/stream_query.rb +61 -36
- data/lib/rubytube/utils.rb +24 -24
- data/lib/rubytube/version.rb +5 -5
- data/lib/rubytube.rb +27 -68
- data/sig/rubytube.rbs +4 -4
- metadata +7 -6
data/lib/rubytube/parser.rb
CHANGED
@@ -1,164 +1,159 @@
|
|
1
|
-
module RubyTube
|
2
|
-
module Parser
|
3
|
-
module_function
|
4
|
-
|
5
|
-
def parse_for_object(html, preceding_regex)
|
6
|
-
regex = Regexp.new(preceding_regex)
|
7
|
-
result = regex.match(html)
|
8
|
-
|
9
|
-
if result.nil?
|
10
|
-
raise HTMLParseError, "No matches for regex #{preceding_regex}"
|
11
|
-
end
|
12
|
-
start_index = result.end(0)
|
13
|
-
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
def find_object_from_startpoint(html, start_point)
|
18
|
-
html = html[start_point..-1]
|
19
|
-
unless [
|
20
|
-
raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
|
21
|
-
end
|
22
|
-
|
23
|
-
last_char =
|
24
|
-
curr_char = nil
|
25
|
-
stack = [html[0]]
|
26
|
-
i = 1
|
27
|
-
|
28
|
-
context_closers = {
|
29
|
-
|
30
|
-
|
31
|
-
'"' => '"',
|
32
|
-
|
33
|
-
}
|
34
|
-
|
35
|
-
while i < html.length
|
36
|
-
break if stack.empty?
|
37
|
-
|
38
|
-
last_char = curr_char unless [
|
39
|
-
curr_char = html[i]
|
40
|
-
curr_context = stack.last
|
41
|
-
|
42
|
-
if curr_char == context_closers[curr_context]
|
43
|
-
stack.pop
|
44
|
-
i += 1
|
45
|
-
next
|
46
|
-
end
|
47
|
-
|
48
|
-
if ['"',
|
49
|
-
if curr_char ==
|
50
|
-
i += 2
|
51
|
-
next
|
52
|
-
end
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
'
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
curr_substring
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
results
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
1
|
+
module RubyTube
|
2
|
+
module Parser
|
3
|
+
module_function
|
4
|
+
|
5
|
+
def parse_for_object(html, preceding_regex)
|
6
|
+
regex = Regexp.new(preceding_regex)
|
7
|
+
result = regex.match(html)
|
8
|
+
|
9
|
+
if result.nil?
|
10
|
+
raise HTMLParseError, "No matches for regex #{preceding_regex}"
|
11
|
+
end
|
12
|
+
start_index = result.end(0)
|
13
|
+
|
14
|
+
parse_for_object_from_startpoint(html, start_index)
|
15
|
+
end
|
16
|
+
|
17
|
+
def find_object_from_startpoint(html, start_point)
|
18
|
+
html = html[start_point..-1]
|
19
|
+
unless ["{", "["].include?(html[0])
|
20
|
+
raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
|
21
|
+
end
|
22
|
+
|
23
|
+
last_char = "{"
|
24
|
+
curr_char = nil
|
25
|
+
stack = [html[0]]
|
26
|
+
i = 1
|
27
|
+
|
28
|
+
context_closers = {
|
29
|
+
"{" => "}",
|
30
|
+
"[" => "]",
|
31
|
+
'"' => '"',
|
32
|
+
"/" => "/"
|
33
|
+
}
|
34
|
+
|
35
|
+
while i < html.length
|
36
|
+
break if stack.empty?
|
37
|
+
|
38
|
+
last_char = curr_char unless [" ", '\n'].include?(curr_char)
|
39
|
+
curr_char = html[i]
|
40
|
+
curr_context = stack.last
|
41
|
+
|
42
|
+
if curr_char == context_closers[curr_context]
|
43
|
+
stack.pop
|
44
|
+
i += 1
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
if ['"', "/"].include?(curr_context)
|
49
|
+
if curr_char == "\\"
|
50
|
+
i += 2
|
51
|
+
next
|
52
|
+
end
|
53
|
+
elsif context_closers.keys.include?(curr_char)
|
54
|
+
unless curr_char == "/" && !["(", ",", "=", ":", "[", "!", "&", "|", "?", "{", "}", ";"].include?(last_char)
|
55
|
+
stack.push(curr_char)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
i += 1
|
60
|
+
end
|
61
|
+
|
62
|
+
html[0...i]
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse_for_object_from_startpoint(html, start_point)
|
66
|
+
html = html[start_point..-1]
|
67
|
+
|
68
|
+
unless ["{", "["].include?(html[0])
|
69
|
+
raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
|
70
|
+
end
|
71
|
+
|
72
|
+
# First letter MUST be an open brace, so we put that in the stack,
|
73
|
+
# and skip the first character.
|
74
|
+
last_char = "{"
|
75
|
+
curr_char = nil
|
76
|
+
stack = [html[0]]
|
77
|
+
i = 1
|
78
|
+
|
79
|
+
context_closers = {
|
80
|
+
"{" => "}",
|
81
|
+
"[" => "]",
|
82
|
+
'"' => '"',
|
83
|
+
:"'" => "'",
|
84
|
+
"/" => "/" # JavaScript regex
|
85
|
+
}
|
86
|
+
|
87
|
+
while i < html.length
|
88
|
+
break if stack.empty?
|
89
|
+
|
90
|
+
last_char = curr_char unless [" ", '\n'].include?(curr_char)
|
91
|
+
curr_char = html[i]
|
92
|
+
curr_context = stack.last
|
93
|
+
|
94
|
+
# If we've reached a context closer, we can remove an element off the stack
|
95
|
+
if curr_char == context_closers[curr_context]
|
96
|
+
stack.pop
|
97
|
+
i += 1
|
98
|
+
next
|
99
|
+
end
|
100
|
+
# Strings and regex expressions require special context handling because they can contain
|
101
|
+
# context openers *and* closers
|
102
|
+
if ['"', "/"].include?(curr_context)
|
103
|
+
# If there's a backslash in a string or regex expression, we skip a character
|
104
|
+
if curr_char == "\\"
|
105
|
+
i += 2
|
106
|
+
next
|
107
|
+
end
|
108
|
+
elsif context_closers.keys.include?(curr_char)
|
109
|
+
# Non-string contexts are when we need to look for context openers.
|
110
|
+
unless curr_char == "/" && ["(", ",", "=", ":", "[", "!", "&", "|", "?", "{", "}", ";"].include?(last_char)
|
111
|
+
stack << curr_char
|
112
|
+
end
|
113
|
+
# Slash starts a regular expression depending on context
|
114
|
+
end
|
115
|
+
|
116
|
+
i += 1
|
117
|
+
end
|
118
|
+
|
119
|
+
html[0..(i - 1)]
|
120
|
+
end
|
121
|
+
|
122
|
+
def throttling_array_split(js_array)
|
123
|
+
results = []
|
124
|
+
curr_substring = js_array[1..-1]
|
125
|
+
|
126
|
+
comma_regex = /,/
|
127
|
+
func_regex = /function\([^)]*\)/
|
128
|
+
|
129
|
+
until curr_substring.empty?
|
130
|
+
if curr_substring.start_with?("function")
|
131
|
+
match = func_regex.match(curr_substring)
|
132
|
+
match_start = match.begin(0)
|
133
|
+
match_end = match.end(0)
|
134
|
+
|
135
|
+
function_text = find_object_from_startpoint(curr_substring, match_end)
|
136
|
+
full_function_def = curr_substring[0, match_end + function_text.length]
|
137
|
+
results << full_function_def
|
138
|
+
curr_substring = curr_substring[full_function_def.length + 1..-1]
|
139
|
+
else
|
140
|
+
match = comma_regex.match(curr_substring)
|
141
|
+
|
142
|
+
begin
|
143
|
+
match_start = match.begin(0)
|
144
|
+
match_end = match.end(0)
|
145
|
+
rescue NoMethodError
|
146
|
+
match_start = curr_substring.length - 1
|
147
|
+
match_end = match_start + 1
|
148
|
+
end
|
149
|
+
|
150
|
+
curr_el = curr_substring[0, match_start]
|
151
|
+
results << curr_el
|
152
|
+
curr_substring = curr_substring[match_end..-1]
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
results
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
data/lib/rubytube/request.rb
CHANGED
@@ -1,75 +1,73 @@
|
|
1
|
-
module RubyTube
|
2
|
-
module Request
|
3
|
-
module_function
|
4
|
-
|
5
|
-
DEFAULT_RANGE_SIZE = 9437184
|
6
|
-
|
7
|
-
def get(url, options = {})
|
8
|
-
send(:get, url, options).body
|
9
|
-
end
|
10
|
-
|
11
|
-
def post(url, options = {})
|
12
|
-
send(:post, url, options).body
|
13
|
-
end
|
14
|
-
|
15
|
-
def head(url, options = {})
|
16
|
-
send(:head, url, options).headers
|
17
|
-
end
|
18
|
-
|
19
|
-
def stream(url, timeout: 60, max_retries: 0)
|
20
|
-
file_size = DEFAULT_RANGE_SIZE
|
21
|
-
downloaded = 0
|
22
|
-
|
23
|
-
while downloaded < file_size
|
24
|
-
stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
|
25
|
-
range_header = "bytes=#{downloaded}-#{stop_pos}"
|
26
|
-
tries = 0
|
27
|
-
|
28
|
-
while true
|
29
|
-
begin
|
30
|
-
if tries >= 1 + max_retries
|
31
|
-
raise MaxRetriesExceeded
|
32
|
-
end
|
33
|
-
response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
|
34
|
-
break
|
35
|
-
rescue Faraday::TimeoutError
|
36
|
-
rescue Faraday::ClientError => e
|
37
|
-
raise e
|
38
|
-
end
|
39
|
-
tries += 1
|
40
|
-
end
|
41
|
-
|
42
|
-
if file_size == DEFAULT_RANGE_SIZE
|
43
|
-
begin
|
44
|
-
resp = send(:get, "#{url}&range=0-99999999999")
|
45
|
-
content_range = resp.headers["Content-Length"]
|
46
|
-
file_size = content_range.to_i
|
47
|
-
rescue KeyError, IndexError, StandardError => e
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
response.body.each_char do |chunk|
|
52
|
-
downloaded += chunk.length
|
53
|
-
yield chunk
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def send(method, url, options = {})
|
59
|
-
headers = {
|
60
|
-
options[:headers] && headers.merge!(options[:headers])
|
61
|
-
|
62
|
-
connection = Faraday.new(url: url) do |faraday|
|
63
|
-
faraday.response :follow_redirects
|
64
|
-
faraday.adapter Faraday.default_adapter
|
65
|
-
end
|
66
|
-
|
67
|
-
req.headers = headers
|
68
|
-
options[:query] && req.params = options[:query]
|
69
|
-
options[:data] && req.body = JSON.dump(options[:data])
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
end
|
75
|
-
end
|
1
|
+
module RubyTube
|
2
|
+
module Request
|
3
|
+
module_function
|
4
|
+
|
5
|
+
DEFAULT_RANGE_SIZE = 9437184
|
6
|
+
|
7
|
+
def get(url, options = {})
|
8
|
+
send(:get, url, options).body
|
9
|
+
end
|
10
|
+
|
11
|
+
def post(url, options = {})
|
12
|
+
send(:post, url, options).body
|
13
|
+
end
|
14
|
+
|
15
|
+
def head(url, options = {})
|
16
|
+
send(:head, url, options).headers
|
17
|
+
end
|
18
|
+
|
19
|
+
def stream(url, timeout: 60, max_retries: 0)
|
20
|
+
file_size = DEFAULT_RANGE_SIZE
|
21
|
+
downloaded = 0
|
22
|
+
|
23
|
+
while downloaded < file_size
|
24
|
+
stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
|
25
|
+
range_header = "bytes=#{downloaded}-#{stop_pos}"
|
26
|
+
tries = 0
|
27
|
+
|
28
|
+
while true
|
29
|
+
begin
|
30
|
+
if tries >= 1 + max_retries
|
31
|
+
raise MaxRetriesExceeded
|
32
|
+
end
|
33
|
+
response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
|
34
|
+
break
|
35
|
+
rescue Faraday::TimeoutError
|
36
|
+
rescue Faraday::ClientError => e
|
37
|
+
raise e
|
38
|
+
end
|
39
|
+
tries += 1
|
40
|
+
end
|
41
|
+
|
42
|
+
if file_size == DEFAULT_RANGE_SIZE
|
43
|
+
begin
|
44
|
+
resp = send(:get, "#{url}&range=0-99999999999")
|
45
|
+
content_range = resp.headers["Content-Length"]
|
46
|
+
file_size = content_range.to_i
|
47
|
+
rescue KeyError, IndexError, StandardError => e
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
response.body.each_char do |chunk|
|
52
|
+
downloaded += chunk.length
|
53
|
+
yield chunk
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def send(method, url, options = {})
|
59
|
+
headers = {"Content-Type": "text/html"}
|
60
|
+
options[:headers] && headers.merge!(options[:headers])
|
61
|
+
|
62
|
+
connection = Faraday.new(url: url) do |faraday|
|
63
|
+
faraday.response :follow_redirects
|
64
|
+
faraday.adapter Faraday.default_adapter
|
65
|
+
end
|
66
|
+
connection.send(method) do |req|
|
67
|
+
req.headers = headers
|
68
|
+
options[:query] && req.params = options[:query]
|
69
|
+
options[:data] && req.body = JSON.dump(options[:data])
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/rubytube/stream.rb
CHANGED
@@ -1,97 +1,95 @@
|
|
1
|
-
module RubyTube
|
2
|
-
class Stream
|
3
|
-
attr_accessor(
|
4
|
-
:monostate,
|
5
|
-
:url,
|
6
|
-
:itag,
|
7
|
-
:mime_type,
|
8
|
-
:codecs,
|
9
|
-
:type,
|
10
|
-
:subtype,
|
11
|
-
:file_size,
|
12
|
-
:is_otf,
|
13
|
-
:bitrate
|
14
|
-
)
|
15
|
-
|
16
|
-
def initialize(stream, monostate)
|
17
|
-
self.monostate = monostate
|
18
|
-
|
19
|
-
self.url = stream[
|
20
|
-
self.itag = stream[
|
21
|
-
|
22
|
-
self.mime_type, self.codecs = Extractor.mime_type_codec(stream[
|
23
|
-
self.type, self.subtype = mime_type.split(
|
24
|
-
|
25
|
-
self.is_otf = stream[
|
26
|
-
self.bitrate = stream[
|
27
|
-
|
28
|
-
self.file_size = stream.fetch(
|
29
|
-
end
|
30
|
-
|
31
|
-
def download(filename: nil, output_dir: nil)
|
32
|
-
file_path = get_file_path(filename, output_dir)
|
33
|
-
|
34
|
-
return file_path if File.exist?(file_path)
|
35
|
-
|
36
|
-
bytes_remaining = file_size
|
37
|
-
|
38
|
-
File.open(file_path,
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
end
|
97
|
-
end
|
1
|
+
module RubyTube
|
2
|
+
class Stream
|
3
|
+
attr_accessor(
|
4
|
+
:monostate,
|
5
|
+
:url,
|
6
|
+
:itag,
|
7
|
+
:mime_type,
|
8
|
+
:codecs,
|
9
|
+
:type,
|
10
|
+
:subtype,
|
11
|
+
:file_size,
|
12
|
+
:is_otf,
|
13
|
+
:bitrate
|
14
|
+
)
|
15
|
+
|
16
|
+
def initialize(stream, monostate)
|
17
|
+
self.monostate = monostate
|
18
|
+
|
19
|
+
self.url = stream["url"]
|
20
|
+
self.itag = stream["itag"].to_i
|
21
|
+
|
22
|
+
self.mime_type, self.codecs = Extractor.mime_type_codec(stream["mimeType"])
|
23
|
+
self.type, self.subtype = mime_type.split("/")
|
24
|
+
|
25
|
+
self.is_otf = stream["is_otf"]
|
26
|
+
self.bitrate = stream["bitrate"]
|
27
|
+
|
28
|
+
self.file_size = stream.fetch("contentLength", 0).to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def download(filename: nil, output_dir: nil)
|
32
|
+
file_path = get_file_path(filename, output_dir)
|
33
|
+
|
34
|
+
return file_path if File.exist?(file_path)
|
35
|
+
|
36
|
+
bytes_remaining = file_size
|
37
|
+
|
38
|
+
File.open(file_path, "wb") do |f|
|
39
|
+
Request.stream(url) do |chunk|
|
40
|
+
bytes_remaining -= chunk.bytesize
|
41
|
+
f.write(chunk)
|
42
|
+
end
|
43
|
+
rescue HTTPError => e
|
44
|
+
raise e if e.code != 404
|
45
|
+
end
|
46
|
+
|
47
|
+
file_path
|
48
|
+
end
|
49
|
+
|
50
|
+
def is_audio?
|
51
|
+
type == "audio"
|
52
|
+
end
|
53
|
+
|
54
|
+
def is_video?
|
55
|
+
type == "video"
|
56
|
+
end
|
57
|
+
|
58
|
+
def is_adaptive?
|
59
|
+
codecs.size % 2 == 1
|
60
|
+
end
|
61
|
+
|
62
|
+
def is_progressive?
|
63
|
+
!is_adaptive?
|
64
|
+
end
|
65
|
+
|
66
|
+
def title
|
67
|
+
monostate.title
|
68
|
+
end
|
69
|
+
|
70
|
+
def resolution
|
71
|
+
stream_format.resolution
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def get_file_path(filename, output_dir, prefix = "")
|
77
|
+
filename ||= default_filename
|
78
|
+
|
79
|
+
if prefix
|
80
|
+
filename = "#{prefix}#{filename}"
|
81
|
+
end
|
82
|
+
|
83
|
+
output_path = Utils.target_directory(output_dir)
|
84
|
+
File.join(output_path, filename)
|
85
|
+
end
|
86
|
+
|
87
|
+
def default_filename
|
88
|
+
"#{monostate.title}.#{subtype}"
|
89
|
+
end
|
90
|
+
|
91
|
+
def stream_format
|
92
|
+
@stream_format ||= StreamFormat.new(itag)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|