rubytube 0.3.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,164 +1,159 @@
1
- module RubyTube
2
- module Parser
3
- module_function
4
-
5
- def parse_for_object(html, preceding_regex)
6
- regex = Regexp.new(preceding_regex)
7
- result = regex.match(html)
8
-
9
- if result.nil?
10
- raise HTMLParseError, "No matches for regex #{preceding_regex}"
11
- end
12
- start_index = result.end(0)
13
-
14
- return parse_for_object_from_startpoint(html, start_index)
15
- end
16
-
17
- def find_object_from_startpoint(html, start_point)
18
- html = html[start_point..-1]
19
- unless ['{', '['].include?(html[0])
20
- raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
21
- end
22
-
23
- last_char = '{'
24
- curr_char = nil
25
- stack = [html[0]]
26
- i = 1
27
-
28
- context_closers = {
29
- '{' => '}',
30
- '[' => ']',
31
- '"' => '"',
32
- '/' => '/',
33
- }
34
-
35
- while i < html.length
36
- break if stack.empty?
37
-
38
- last_char = curr_char unless [' ', '\n'].include?(curr_char)
39
- curr_char = html[i]
40
- curr_context = stack.last
41
-
42
- if curr_char == context_closers[curr_context]
43
- stack.pop
44
- i += 1
45
- next
46
- end
47
-
48
- if ['"', '/'].include?(curr_context)
49
- if curr_char == '\\'
50
- i += 2
51
- next
52
- end
53
- else
54
- if context_closers.keys.include?(curr_char)
55
- unless curr_char == '/' && !['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
56
- stack.push(curr_char)
57
- end
58
- end
59
- end
60
-
61
- i += 1
62
- end
63
-
64
- full_obj = html[0...i]
65
- full_obj
66
- end
67
-
68
- def parse_for_object_from_startpoint(html, start_point)
69
- html = html[start_point..-1]
70
-
71
- unless ['{', '['].include?(html[0])
72
- raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
73
- end
74
-
75
- # First letter MUST be an open brace, so we put that in the stack,
76
- # and skip the first character.
77
- last_char = '{'
78
- curr_char = nil
79
- stack = [html[0]]
80
- i = 1
81
-
82
- context_closers = {
83
- '{' => '}',
84
- '[' => ']',
85
- '"' => '"',
86
- '/' => '/' # JavaScript regex
87
- }
88
-
89
- while i < html.length
90
- break if stack.empty?
91
-
92
- last_char = curr_char unless [' ', '\n'].include?(curr_char)
93
- curr_char = html[i]
94
- curr_context = stack.last
95
-
96
- # If we've reached a context closer, we can remove an element off the stack
97
- if curr_char == context_closers[curr_context]
98
- stack.pop
99
- i += 1
100
- next
101
- end
102
- # Strings and regex expressions require special context handling because they can contain
103
- # context openers *and* closers
104
- if ['"', '/'].include?(curr_context)
105
- # If there's a backslash in a string or regex expression, we skip a character
106
- if curr_char == '\\'
107
- i += 2
108
- next
109
- end
110
- else
111
- # Non-string contexts are when we need to look for context openers.
112
- if context_closers.keys.include?(curr_char)
113
- # Slash starts a regular expression depending on context
114
- unless curr_char == '/' && ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
115
- stack << curr_char
116
- end
117
- end
118
- end
119
-
120
- i += 1
121
- end
122
-
123
- full_obj = html[0..(i - 1)]
124
- full_obj
125
- end
126
-
127
- def throttling_array_split(js_array)
128
- results = []
129
- curr_substring = js_array[1..-1]
130
-
131
- comma_regex = /,/
132
- func_regex = /function\([^)]*\)/
133
-
134
- until curr_substring.empty?
135
- if curr_substring.start_with?('function')
136
- match = func_regex.match(curr_substring)
137
- match_start = match.begin(0)
138
- match_end = match.end(0)
139
-
140
- function_text = find_object_from_startpoint(curr_substring, match_end)
141
- full_function_def = curr_substring[0, match_end + function_text.length]
142
- results << full_function_def
143
- curr_substring = curr_substring[full_function_def.length + 1..-1]
144
- else
145
- match = comma_regex.match(curr_substring)
146
-
147
- begin
148
- match_start = match.begin(0)
149
- match_end = match.end(0)
150
- rescue NoMethodError
151
- match_start = curr_substring.length - 1
152
- match_end = match_start + 1
153
- end
154
-
155
- curr_el = curr_substring[0, match_start]
156
- results << curr_el
157
- curr_substring = curr_substring[match_end..-1]
158
- end
159
- end
160
-
161
- results
162
- end
163
- end
164
- end
1
+ module RubyTube
2
+ module Parser
3
+ module_function
4
+
5
+ def parse_for_object(html, preceding_regex)
6
+ regex = Regexp.new(preceding_regex)
7
+ result = regex.match(html)
8
+
9
+ if result.nil?
10
+ raise HTMLParseError, "No matches for regex #{preceding_regex}"
11
+ end
12
+ start_index = result.end(0)
13
+
14
+ parse_for_object_from_startpoint(html, start_index)
15
+ end
16
+
17
+ def find_object_from_startpoint(html, start_point)
18
+ html = html[start_point..-1]
19
+ unless ["{", "["].include?(html[0])
20
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
21
+ end
22
+
23
+ last_char = "{"
24
+ curr_char = nil
25
+ stack = [html[0]]
26
+ i = 1
27
+
28
+ context_closers = {
29
+ "{" => "}",
30
+ "[" => "]",
31
+ '"' => '"',
32
+ "/" => "/"
33
+ }
34
+
35
+ while i < html.length
36
+ break if stack.empty?
37
+
38
+ last_char = curr_char unless [" ", '\n'].include?(curr_char)
39
+ curr_char = html[i]
40
+ curr_context = stack.last
41
+
42
+ if curr_char == context_closers[curr_context]
43
+ stack.pop
44
+ i += 1
45
+ next
46
+ end
47
+
48
+ if ['"', "/"].include?(curr_context)
49
+ if curr_char == "\\"
50
+ i += 2
51
+ next
52
+ end
53
+ elsif context_closers.keys.include?(curr_char)
54
+ unless curr_char == "/" && !["(", ",", "=", ":", "[", "!", "&", "|", "?", "{", "}", ";"].include?(last_char)
55
+ stack.push(curr_char)
56
+ end
57
+ end
58
+
59
+ i += 1
60
+ end
61
+
62
+ html[0...i]
63
+ end
64
+
65
+ def parse_for_object_from_startpoint(html, start_point)
66
+ html = html[start_point..-1]
67
+
68
+ unless ["{", "["].include?(html[0])
69
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
70
+ end
71
+
72
+ # First letter MUST be an open brace, so we put that in the stack,
73
+ # and skip the first character.
74
+ last_char = "{"
75
+ curr_char = nil
76
+ stack = [html[0]]
77
+ i = 1
78
+
79
+ context_closers = {
80
+ "{" => "}",
81
+ "[" => "]",
82
+ '"' => '"',
83
+ :"'" => "'",
84
+ "/" => "/" # JavaScript regex
85
+ }
86
+
87
+ while i < html.length
88
+ break if stack.empty?
89
+
90
+ last_char = curr_char unless [" ", '\n'].include?(curr_char)
91
+ curr_char = html[i]
92
+ curr_context = stack.last
93
+
94
+ # If we've reached a context closer, we can remove an element off the stack
95
+ if curr_char == context_closers[curr_context]
96
+ stack.pop
97
+ i += 1
98
+ next
99
+ end
100
+ # Strings and regex expressions require special context handling because they can contain
101
+ # context openers *and* closers
102
+ if ['"', "/"].include?(curr_context)
103
+ # If there's a backslash in a string or regex expression, we skip a character
104
+ if curr_char == "\\"
105
+ i += 2
106
+ next
107
+ end
108
+ elsif context_closers.keys.include?(curr_char)
109
+ # Non-string contexts are when we need to look for context openers.
110
+ unless curr_char == "/" && ["(", ",", "=", ":", "[", "!", "&", "|", "?", "{", "}", ";"].include?(last_char)
111
+ stack << curr_char
112
+ end
113
+ # Slash starts a regular expression depending on context
114
+ end
115
+
116
+ i += 1
117
+ end
118
+
119
+ html[0..(i - 1)]
120
+ end
121
+
122
+ def throttling_array_split(js_array)
123
+ results = []
124
+ curr_substring = js_array[1..-1]
125
+
126
+ comma_regex = /,/
127
+ func_regex = /function\([^)]*\)/
128
+
129
+ until curr_substring.empty?
130
+ if curr_substring.start_with?("function")
131
+ match = func_regex.match(curr_substring)
132
+ match_start = match.begin(0)
133
+ match_end = match.end(0)
134
+
135
+ function_text = find_object_from_startpoint(curr_substring, match_end)
136
+ full_function_def = curr_substring[0, match_end + function_text.length]
137
+ results << full_function_def
138
+ curr_substring = curr_substring[full_function_def.length + 1..-1]
139
+ else
140
+ match = comma_regex.match(curr_substring)
141
+
142
+ begin
143
+ match_start = match.begin(0)
144
+ match_end = match.end(0)
145
+ rescue NoMethodError
146
+ match_start = curr_substring.length - 1
147
+ match_end = match_start + 1
148
+ end
149
+
150
+ curr_el = curr_substring[0, match_start]
151
+ results << curr_el
152
+ curr_substring = curr_substring[match_end..-1]
153
+ end
154
+ end
155
+
156
+ results
157
+ end
158
+ end
159
+ end
@@ -1,75 +1,73 @@
1
- module RubyTube
2
- module Request
3
- module_function
4
-
5
- DEFAULT_RANGE_SIZE = 9437184
6
-
7
- def get(url, options = {})
8
- send(:get, url, options).body
9
- end
10
-
11
- def post(url, options = {})
12
- send(:post, url, options).body
13
- end
14
-
15
- def head(url, options = {})
16
- send(:head, url, options).headers
17
- end
18
-
19
- def stream(url, timeout: 60, max_retries: 0)
20
- file_size = DEFAULT_RANGE_SIZE
21
- downloaded = 0
22
-
23
- while downloaded < file_size
24
- stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
25
- range_header = "bytes=#{downloaded}-#{stop_pos}"
26
- tries = 0
27
-
28
- while true
29
- begin
30
- if tries >= 1 + max_retries
31
- raise MaxRetriesExceeded
32
- end
33
- response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
34
- break
35
- rescue Faraday::TimeoutError
36
- rescue Faraday::ClientError => e
37
- raise e
38
- end
39
- tries += 1
40
- end
41
-
42
- if file_size == DEFAULT_RANGE_SIZE
43
- begin
44
- resp = send(:get, "#{url}&range=0-99999999999")
45
- content_range = resp.headers["Content-Length"]
46
- file_size = content_range.to_i
47
- rescue KeyError, IndexError, StandardError => e
48
- end
49
- end
50
-
51
- response.body.each_char do |chunk|
52
- downloaded += chunk.length
53
- yield chunk
54
- end
55
- end
56
- end
57
-
58
- def send(method, url, options = {})
59
- headers = { 'Content-Type': 'text/html' }
60
- options[:headers] && headers.merge!(options[:headers])
61
-
62
- connection = Faraday.new(url: url) do |faraday|
63
- faraday.response :follow_redirects
64
- faraday.adapter Faraday.default_adapter
65
- end
66
- response = connection.send(method) do |req|
67
- req.headers = headers
68
- options[:query] && req.params = options[:query]
69
- options[:data] && req.body = JSON.dump(options[:data])
70
- end
71
-
72
- response
73
- end
74
- end
75
- end
1
+ module RubyTube
2
+ module Request
3
+ module_function
4
+
5
+ DEFAULT_RANGE_SIZE = 9437184
6
+
7
+ def get(url, options = {})
8
+ send(:get, url, options).body
9
+ end
10
+
11
+ def post(url, options = {})
12
+ send(:post, url, options).body
13
+ end
14
+
15
+ def head(url, options = {})
16
+ send(:head, url, options).headers
17
+ end
18
+
19
+ def stream(url, timeout: 60, max_retries: 0)
20
+ file_size = DEFAULT_RANGE_SIZE
21
+ downloaded = 0
22
+
23
+ while downloaded < file_size
24
+ stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
25
+ range_header = "bytes=#{downloaded}-#{stop_pos}"
26
+ tries = 0
27
+
28
+ while true
29
+ begin
30
+ if tries >= 1 + max_retries
31
+ raise MaxRetriesExceeded
32
+ end
33
+ response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
34
+ break
35
+ rescue Faraday::TimeoutError
36
+ rescue Faraday::ClientError => e
37
+ raise e
38
+ end
39
+ tries += 1
40
+ end
41
+
42
+ if file_size == DEFAULT_RANGE_SIZE
43
+ begin
44
+ resp = send(:get, "#{url}&range=0-99999999999")
45
+ content_range = resp.headers["Content-Length"]
46
+ file_size = content_range.to_i
47
+ rescue KeyError, IndexError, StandardError => e
48
+ end
49
+ end
50
+
51
+ response.body.each_char do |chunk|
52
+ downloaded += chunk.length
53
+ yield chunk
54
+ end
55
+ end
56
+ end
57
+
58
+ def send(method, url, options = {})
59
+ headers = {"Content-Type": "text/html"}
60
+ options[:headers] && headers.merge!(options[:headers])
61
+
62
+ connection = Faraday.new(url: url) do |faraday|
63
+ faraday.response :follow_redirects
64
+ faraday.adapter Faraday.default_adapter
65
+ end
66
+ connection.send(method) do |req|
67
+ req.headers = headers
68
+ options[:query] && req.params = options[:query]
69
+ options[:data] && req.body = JSON.dump(options[:data])
70
+ end
71
+ end
72
+ end
73
+ end
@@ -1,97 +1,95 @@
1
- module RubyTube
2
- class Stream
3
- attr_accessor(
4
- :monostate,
5
- :url,
6
- :itag,
7
- :mime_type,
8
- :codecs,
9
- :type,
10
- :subtype,
11
- :file_size,
12
- :is_otf,
13
- :bitrate
14
- )
15
-
16
- def initialize(stream, monostate)
17
- self.monostate = monostate
18
-
19
- self.url = stream['url']
20
- self.itag = stream['itag'].to_i
21
-
22
- self.mime_type, self.codecs = Extractor.mime_type_codec(stream['mimeType'])
23
- self.type, self.subtype = mime_type.split('/')
24
-
25
- self.is_otf = stream['is_otf']
26
- self.bitrate = stream['bitrate']
27
-
28
- self.file_size = stream.fetch('contentLength', 0).to_i
29
- end
30
-
31
- def download(filename: nil, output_dir: nil)
32
- file_path = get_file_path(filename, output_dir)
33
-
34
- return file_path if File.exist?(file_path)
35
-
36
- bytes_remaining = file_size
37
-
38
- File.open(file_path, 'wb') do |f|
39
- begin
40
- Request.stream(url) do |chunk|
41
- bytes_remaining -= chunk.bytesize
42
- f.write(chunk)
43
- end
44
- rescue HTTPError => e
45
- raise e if e.code != 404
46
- end
47
- end
48
-
49
- file_path
50
- end
51
-
52
- def is_audio?
53
- type == 'audio'
54
- end
55
-
56
- def is_video?
57
- type == 'video'
58
- end
59
-
60
- def is_adaptive?
61
- codecs.size % 2 == 1
62
- end
63
-
64
- def is_progressive?
65
- !is_adaptive?
66
- end
67
-
68
- def title
69
- monostate.title
70
- end
71
-
72
- def resolution
73
- stream_format.resolution
74
- end
75
-
76
- private
77
-
78
- def get_file_path(filename, output_dir, prefix = '')
79
- filename = default_filename unless filename
80
-
81
- if prefix
82
- filename = "#{prefix}#{filename}"
83
- end
84
-
85
- output_path = Utils.target_directory(output_dir)
86
- File.join(output_path, filename)
87
- end
88
-
89
- def default_filename
90
- "#{monostate.title}.#{subtype}"
91
- end
92
-
93
- def stream_format
94
- @stream_format ||= StreamFormat.new(itag)
95
- end
96
- end
97
- end
1
+ module RubyTube
2
+ class Stream
3
+ attr_accessor(
4
+ :monostate,
5
+ :url,
6
+ :itag,
7
+ :mime_type,
8
+ :codecs,
9
+ :type,
10
+ :subtype,
11
+ :file_size,
12
+ :is_otf,
13
+ :bitrate
14
+ )
15
+
16
+ def initialize(stream, monostate)
17
+ self.monostate = monostate
18
+
19
+ self.url = stream["url"]
20
+ self.itag = stream["itag"].to_i
21
+
22
+ self.mime_type, self.codecs = Extractor.mime_type_codec(stream["mimeType"])
23
+ self.type, self.subtype = mime_type.split("/")
24
+
25
+ self.is_otf = stream["is_otf"]
26
+ self.bitrate = stream["bitrate"]
27
+
28
+ self.file_size = stream.fetch("contentLength", 0).to_i
29
+ end
30
+
31
+ def download(filename: nil, output_dir: nil)
32
+ file_path = get_file_path(filename, output_dir)
33
+
34
+ return file_path if File.exist?(file_path)
35
+
36
+ bytes_remaining = file_size
37
+
38
+ File.open(file_path, "wb") do |f|
39
+ Request.stream(url) do |chunk|
40
+ bytes_remaining -= chunk.bytesize
41
+ f.write(chunk)
42
+ end
43
+ rescue HTTPError => e
44
+ raise e if e.code != 404
45
+ end
46
+
47
+ file_path
48
+ end
49
+
50
+ def is_audio?
51
+ type == "audio"
52
+ end
53
+
54
+ def is_video?
55
+ type == "video"
56
+ end
57
+
58
+ def is_adaptive?
59
+ codecs.size % 2 == 1
60
+ end
61
+
62
+ def is_progressive?
63
+ !is_adaptive?
64
+ end
65
+
66
+ def title
67
+ monostate.title
68
+ end
69
+
70
+ def resolution
71
+ stream_format.resolution
72
+ end
73
+
74
+ private
75
+
76
+ def get_file_path(filename, output_dir, prefix = "")
77
+ filename ||= default_filename
78
+
79
+ if prefix
80
+ filename = "#{prefix}#{filename}"
81
+ end
82
+
83
+ output_path = Utils.target_directory(output_dir)
84
+ File.join(output_path, filename)
85
+ end
86
+
87
+ def default_filename
88
+ "#{monostate.title}.#{subtype}"
89
+ end
90
+
91
+ def stream_format
92
+ @stream_format ||= StreamFormat.new(itag)
93
+ end
94
+ end
95
+ end