rubytube 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,164 +1,164 @@
1
- module RubyTube
2
- module Parser
3
- module_function
4
-
5
- def parse_for_object(html, preceding_regex)
6
- regex = Regexp.new(preceding_regex)
7
- result = regex.match(html)
8
-
9
- if result.nil?
10
- raise HTMLParseError, "No matches for regex #{preceding_regex}"
11
- end
12
- start_index = result.end(0)
13
-
14
- return parse_for_object_from_startpoint(html, start_index)
15
- end
16
-
17
- def find_object_from_startpoint(html, start_point)
18
- html = html[start_point..-1]
19
- unless ['{', '['].include?(html[0])
20
- raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
21
- end
22
-
23
- last_char = '{'
24
- curr_char = nil
25
- stack = [html[0]]
26
- i = 1
27
-
28
- context_closers = {
29
- '{' => '}',
30
- '[' => ']',
31
- '"' => '"',
32
- '/' => '/',
33
- }
34
-
35
- while i < html.length
36
- break if stack.empty?
37
-
38
- last_char = curr_char unless [' ', '\n'].include?(curr_char)
39
- curr_char = html[i]
40
- curr_context = stack.last
41
-
42
- if curr_char == context_closers[curr_context]
43
- stack.pop
44
- i += 1
45
- next
46
- end
47
-
48
- if ['"', '/'].include?(curr_context)
49
- if curr_char == '\\'
50
- i += 2
51
- next
52
- end
53
- else
54
- if context_closers.keys.include?(curr_char)
55
- unless curr_char == '/' && !['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
56
- stack.push(curr_char)
57
- end
58
- end
59
- end
60
-
61
- i += 1
62
- end
63
-
64
- full_obj = html[0...i]
65
- full_obj
66
- end
67
-
68
- def parse_for_object_from_startpoint(html, start_point)
69
- html = html[start_point..-1]
70
-
71
- unless ['{', '['].include?(html[0])
72
- raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
73
- end
74
-
75
- # First letter MUST be an open brace, so we put that in the stack,
76
- # and skip the first character.
77
- last_char = '{'
78
- curr_char = nil
79
- stack = [html[0]]
80
- i = 1
81
-
82
- context_closers = {
83
- '{' => '}',
84
- '[' => ']',
85
- '"' => '"',
86
- '/' => '/' # JavaScript regex
87
- }
88
-
89
- while i < html.length
90
- break if stack.empty?
91
-
92
- last_char = curr_char unless [' ', '\n'].include?(curr_char)
93
- curr_char = html[i]
94
- curr_context = stack.last
95
-
96
- # If we've reached a context closer, we can remove an element off the stack
97
- if curr_char == context_closers[curr_context]
98
- stack.pop
99
- i += 1
100
- next
101
- end
102
- # Strings and regex expressions require special context handling because they can contain
103
- # context openers *and* closers
104
- if ['"', '/'].include?(curr_context)
105
- # If there's a backslash in a string or regex expression, we skip a character
106
- if curr_char == '\\'
107
- i += 2
108
- next
109
- end
110
- else
111
- # Non-string contexts are when we need to look for context openers.
112
- if context_closers.keys.include?(curr_char)
113
- # Slash starts a regular expression depending on context
114
- unless curr_char == '/' && ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
115
- stack << curr_char
116
- end
117
- end
118
- end
119
-
120
- i += 1
121
- end
122
-
123
- full_obj = html[0..(i - 1)]
124
- full_obj
125
- end
126
-
127
- def throttling_array_split(js_array)
128
- results = []
129
- curr_substring = js_array[1..-1]
130
-
131
- comma_regex = /,/
132
- func_regex = /function\([^)]*\)/
133
-
134
- until curr_substring.empty?
135
- if curr_substring.start_with?('function')
136
- match = func_regex.match(curr_substring)
137
- match_start = match.begin(0)
138
- match_end = match.end(0)
139
-
140
- function_text = find_object_from_startpoint(curr_substring, match_end)
141
- full_function_def = curr_substring[0, match_end + function_text.length]
142
- results << full_function_def
143
- curr_substring = curr_substring[full_function_def.length + 1..-1]
144
- else
145
- match = comma_regex.match(curr_substring)
146
-
147
- begin
148
- match_start = match.begin(0)
149
- match_end = match.end(0)
150
- rescue NoMethodError
151
- match_start = curr_substring.length - 1
152
- match_end = match_start + 1
153
- end
154
-
155
- curr_el = curr_substring[0, match_start]
156
- results << curr_el
157
- curr_substring = curr_substring[match_end..-1]
158
- end
159
- end
160
-
161
- results
162
- end
163
- end
164
- end
1
+ module RubyTube
2
+ module Parser
3
+ module_function
4
+
5
+ def parse_for_object(html, preceding_regex)
6
+ regex = Regexp.new(preceding_regex)
7
+ result = regex.match(html)
8
+
9
+ if result.nil?
10
+ raise HTMLParseError, "No matches for regex #{preceding_regex}"
11
+ end
12
+ start_index = result.end(0)
13
+
14
+ return parse_for_object_from_startpoint(html, start_index)
15
+ end
16
+
17
+ def find_object_from_startpoint(html, start_point)
18
+ html = html[start_point..-1]
19
+ unless ['{', '['].include?(html[0])
20
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
21
+ end
22
+
23
+ last_char = '{'
24
+ curr_char = nil
25
+ stack = [html[0]]
26
+ i = 1
27
+
28
+ context_closers = {
29
+ '{' => '}',
30
+ '[' => ']',
31
+ '"' => '"',
32
+ '/' => '/',
33
+ }
34
+
35
+ while i < html.length
36
+ break if stack.empty?
37
+
38
+ last_char = curr_char unless [' ', '\n'].include?(curr_char)
39
+ curr_char = html[i]
40
+ curr_context = stack.last
41
+
42
+ if curr_char == context_closers[curr_context]
43
+ stack.pop
44
+ i += 1
45
+ next
46
+ end
47
+
48
+ if ['"', '/'].include?(curr_context)
49
+ if curr_char == '\\'
50
+ i += 2
51
+ next
52
+ end
53
+ else
54
+ if context_closers.keys.include?(curr_char)
55
+ unless curr_char == '/' && !['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
56
+ stack.push(curr_char)
57
+ end
58
+ end
59
+ end
60
+
61
+ i += 1
62
+ end
63
+
64
+ full_obj = html[0...i]
65
+ full_obj
66
+ end
67
+
68
+ def parse_for_object_from_startpoint(html, start_point)
69
+ html = html[start_point..-1]
70
+
71
+ unless ['{', '['].include?(html[0])
72
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
73
+ end
74
+
75
+ # First letter MUST be an open brace, so we put that in the stack,
76
+ # and skip the first character.
77
+ last_char = '{'
78
+ curr_char = nil
79
+ stack = [html[0]]
80
+ i = 1
81
+
82
+ context_closers = {
83
+ '{' => '}',
84
+ '[' => ']',
85
+ '"' => '"',
86
+ '/' => '/' # JavaScript regex
87
+ }
88
+
89
+ while i < html.length
90
+ break if stack.empty?
91
+
92
+ last_char = curr_char unless [' ', '\n'].include?(curr_char)
93
+ curr_char = html[i]
94
+ curr_context = stack.last
95
+
96
+ # If we've reached a context closer, we can remove an element off the stack
97
+ if curr_char == context_closers[curr_context]
98
+ stack.pop
99
+ i += 1
100
+ next
101
+ end
102
+ # Strings and regex expressions require special context handling because they can contain
103
+ # context openers *and* closers
104
+ if ['"', '/'].include?(curr_context)
105
+ # If there's a backslash in a string or regex expression, we skip a character
106
+ if curr_char == '\\'
107
+ i += 2
108
+ next
109
+ end
110
+ else
111
+ # Non-string contexts are when we need to look for context openers.
112
+ if context_closers.keys.include?(curr_char)
113
+ # Slash starts a regular expression depending on context
114
+ unless curr_char == '/' && ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
115
+ stack << curr_char
116
+ end
117
+ end
118
+ end
119
+
120
+ i += 1
121
+ end
122
+
123
+ full_obj = html[0..(i - 1)]
124
+ full_obj
125
+ end
126
+
127
+ def throttling_array_split(js_array)
128
+ results = []
129
+ curr_substring = js_array[1..-1]
130
+
131
+ comma_regex = /,/
132
+ func_regex = /function\([^)]*\)/
133
+
134
+ until curr_substring.empty?
135
+ if curr_substring.start_with?('function')
136
+ match = func_regex.match(curr_substring)
137
+ match_start = match.begin(0)
138
+ match_end = match.end(0)
139
+
140
+ function_text = find_object_from_startpoint(curr_substring, match_end)
141
+ full_function_def = curr_substring[0, match_end + function_text.length]
142
+ results << full_function_def
143
+ curr_substring = curr_substring[full_function_def.length + 1..-1]
144
+ else
145
+ match = comma_regex.match(curr_substring)
146
+
147
+ begin
148
+ match_start = match.begin(0)
149
+ match_end = match.end(0)
150
+ rescue NoMethodError
151
+ match_start = curr_substring.length - 1
152
+ match_end = match_start + 1
153
+ end
154
+
155
+ curr_el = curr_substring[0, match_start]
156
+ results << curr_el
157
+ curr_substring = curr_substring[match_end..-1]
158
+ end
159
+ end
160
+
161
+ results
162
+ end
163
+ end
164
+ end
@@ -1,75 +1,75 @@
1
- module RubyTube
2
- module Request
3
- module_function
4
-
5
- DEFAULT_RANGE_SIZE = 9437184
6
-
7
- def get(url, options = {})
8
- send(:get, url, options).body
9
- end
10
-
11
- def post(url, options = {})
12
- send(:post, url, options).body
13
- end
14
-
15
- def head(url, options = {})
16
- send(:head, url, options).headers
17
- end
18
-
19
- def stream(url, timeout: 60, max_retries: 0)
20
- file_size = DEFAULT_RANGE_SIZE
21
- downloaded = 0
22
-
23
- while downloaded < file_size
24
- stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
25
- range_header = "bytes=#{downloaded}-#{stop_pos}"
26
- tries = 0
27
-
28
- while true
29
- begin
30
- if tries >= 1 + max_retries
31
- raise MaxRetriesExceeded
32
- end
33
- response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
34
- break
35
- rescue Faraday::TimeoutError
36
- rescue Faraday::ClientError => e
37
- raise e
38
- end
39
- tries += 1
40
- end
41
-
42
- if file_size == DEFAULT_RANGE_SIZE
43
- begin
44
- resp = send(:get, "#{url}&range=0-99999999999")
45
- content_range = resp.headers["Content-Length"]
46
- file_size = content_range.to_i
47
- rescue KeyError, IndexError, StandardError => e
48
- end
49
- end
50
-
51
- response.body.each_char do |chunk|
52
- downloaded += chunk.length
53
- yield chunk
54
- end
55
- end
56
- end
57
-
58
- def send(method, url, options = {})
59
- headers = { 'Content-Type': 'text/html' }
60
- options[:headers] && headers.merge!(options[:headers])
61
-
62
- connection = Faraday.new(url: url) do |faraday|
63
- faraday.response :follow_redirects
64
- faraday.adapter Faraday.default_adapter
65
- end
66
- response = connection.send(method) do |req|
67
- req.headers = headers
68
- options[:query] && req.params = options[:query]
69
- options[:data] && req.body = JSON.dump(options[:data])
70
- end
71
-
72
- response
73
- end
74
- end
75
- end
1
+ module RubyTube
2
+ module Request
3
+ module_function
4
+
5
+ DEFAULT_RANGE_SIZE = 9437184
6
+
7
+ def get(url, options = {})
8
+ send(:get, url, options).body
9
+ end
10
+
11
+ def post(url, options = {})
12
+ send(:post, url, options).body
13
+ end
14
+
15
+ def head(url, options = {})
16
+ send(:head, url, options).headers
17
+ end
18
+
19
+ def stream(url, timeout: 60, max_retries: 0)
20
+ file_size = DEFAULT_RANGE_SIZE
21
+ downloaded = 0
22
+
23
+ while downloaded < file_size
24
+ stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
25
+ range_header = "bytes=#{downloaded}-#{stop_pos}"
26
+ tries = 0
27
+
28
+ while true
29
+ begin
30
+ if tries >= 1 + max_retries
31
+ raise MaxRetriesExceeded
32
+ end
33
+ response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
34
+ break
35
+ rescue Faraday::TimeoutError
36
+ rescue Faraday::ClientError => e
37
+ raise e
38
+ end
39
+ tries += 1
40
+ end
41
+
42
+ if file_size == DEFAULT_RANGE_SIZE
43
+ begin
44
+ resp = send(:get, "#{url}&range=0-99999999999")
45
+ content_range = resp.headers["Content-Length"]
46
+ file_size = content_range.to_i
47
+ rescue KeyError, IndexError, StandardError => e
48
+ end
49
+ end
50
+
51
+ response.body.each_char do |chunk|
52
+ downloaded += chunk.length
53
+ yield chunk
54
+ end
55
+ end
56
+ end
57
+
58
+ def send(method, url, options = {})
59
+ headers = { 'Content-Type': 'text/html' }
60
+ options[:headers] && headers.merge!(options[:headers])
61
+
62
+ connection = Faraday.new(url: url) do |faraday|
63
+ faraday.response :follow_redirects
64
+ faraday.adapter Faraday.default_adapter
65
+ end
66
+ response = connection.send(method) do |req|
67
+ req.headers = headers
68
+ options[:query] && req.params = options[:query]
69
+ options[:data] && req.body = JSON.dump(options[:data])
70
+ end
71
+
72
+ response
73
+ end
74
+ end
75
+ end
@@ -1,97 +1,97 @@
1
- module RubyTube
2
- class Stream
3
- attr_accessor(
4
- :monostate,
5
- :url,
6
- :itag,
7
- :mime_type,
8
- :codecs,
9
- :type,
10
- :subtype,
11
- :file_size,
12
- :is_otf,
13
- :bitrate
14
- )
15
-
16
- def initialize(stream, monostate)
17
- self.monostate = monostate
18
-
19
- self.url = stream['url']
20
- self.itag = stream['itag'].to_i
21
-
22
- self.mime_type, self.codecs = Extractor.mime_type_codec(stream['mimeType'])
23
- self.type, self.subtype = mime_type.split('/')
24
-
25
- self.is_otf = stream['is_otf']
26
- self.bitrate = stream['bitrate']
27
-
28
- self.file_size = stream.fetch('contentLength', 0).to_i
29
- end
30
-
31
- def download(filename: nil, output_dir: nil)
32
- file_path = get_file_path(filename, output_dir)
33
-
34
- return file_path if File.exist?(file_path)
35
-
36
- bytes_remaining = file_size
37
-
38
- File.open(file_path, 'wb') do |f|
39
- begin
40
- Request.stream(url) do |chunk|
41
- bytes_remaining -= chunk.bytesize
42
- f.write(chunk)
43
- end
44
- rescue HTTPError => e
45
- raise e if e.code != 404
46
- end
47
- end
48
-
49
- file_path
50
- end
51
-
52
- def is_audio?
53
- type == 'audio'
54
- end
55
-
56
- def is_video?
57
- type == 'video'
58
- end
59
-
60
- def is_adaptive?
61
- codecs.size % 2 == 1
62
- end
63
-
64
- def is_progressive?
65
- !is_adaptive?
66
- end
67
-
68
- def title
69
- monostate.title
70
- end
71
-
72
- def resolution
73
- stream_format.resolution
74
- end
75
-
76
- private
77
-
78
- def get_file_path(filename, output_dir, prefix = '')
79
- filename = default_filename unless filename
80
-
81
- if prefix
82
- filename = "#{prefix}#{filename}"
83
- end
84
-
85
- output_path = Utils.target_directory(output_dir)
86
- File.join(output_path, filename)
87
- end
88
-
89
- def default_filename
90
- "#{monostate.title}.#{subtype}"
91
- end
92
-
93
- def stream_format
94
- @stream_format ||= StreamFormat.new(itag)
95
- end
96
- end
97
- end
1
+ module RubyTube
2
+ class Stream
3
+ attr_accessor(
4
+ :monostate,
5
+ :url,
6
+ :itag,
7
+ :mime_type,
8
+ :codecs,
9
+ :type,
10
+ :subtype,
11
+ :file_size,
12
+ :is_otf,
13
+ :bitrate
14
+ )
15
+
16
+ def initialize(stream, monostate)
17
+ self.monostate = monostate
18
+
19
+ self.url = stream['url']
20
+ self.itag = stream['itag'].to_i
21
+
22
+ self.mime_type, self.codecs = Extractor.mime_type_codec(stream['mimeType'])
23
+ self.type, self.subtype = mime_type.split('/')
24
+
25
+ self.is_otf = stream['is_otf']
26
+ self.bitrate = stream['bitrate']
27
+
28
+ self.file_size = stream.fetch('contentLength', 0).to_i
29
+ end
30
+
31
+ def download(filename: nil, output_dir: nil)
32
+ file_path = get_file_path(filename, output_dir)
33
+
34
+ return file_path if File.exist?(file_path)
35
+
36
+ bytes_remaining = file_size
37
+
38
+ File.open(file_path, 'wb') do |f|
39
+ begin
40
+ Request.stream(url) do |chunk|
41
+ bytes_remaining -= chunk.bytesize
42
+ f.write(chunk)
43
+ end
44
+ rescue HTTPError => e
45
+ raise e if e.code != 404
46
+ end
47
+ end
48
+
49
+ file_path
50
+ end
51
+
52
+ def is_audio?
53
+ type == 'audio'
54
+ end
55
+
56
+ def is_video?
57
+ type == 'video'
58
+ end
59
+
60
+ def is_adaptive?
61
+ codecs.size % 2 == 1
62
+ end
63
+
64
+ def is_progressive?
65
+ !is_adaptive?
66
+ end
67
+
68
+ def title
69
+ monostate.title
70
+ end
71
+
72
+ def resolution
73
+ stream_format.resolution
74
+ end
75
+
76
+ private
77
+
78
+ def get_file_path(filename, output_dir, prefix = '')
79
+ filename = default_filename unless filename
80
+
81
+ if prefix
82
+ filename = "#{prefix}#{filename}"
83
+ end
84
+
85
+ output_path = Utils.target_directory(output_dir)
86
+ File.join(output_path, filename)
87
+ end
88
+
89
+ def default_filename
90
+ "#{monostate.title}.#{subtype}"
91
+ end
92
+
93
+ def stream_format
94
+ @stream_format ||= StreamFormat.new(itag)
95
+ end
96
+ end
97
+ end