rubytube 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,164 +1,164 @@
1
- module RubyTube
2
- module Parser
3
- module_function
4
-
5
- def parse_for_object(html, preceding_regex)
6
- regex = Regexp.new(preceding_regex)
7
- result = regex.match(html)
8
-
9
- if result.nil?
10
- raise HTMLParseError, "No matches for regex #{preceding_regex}"
11
- end
12
- start_index = result.end(0)
13
-
14
- return parse_for_object_from_startpoint(html, start_index)
15
- end
16
-
17
- def find_object_from_startpoint(html, start_point)
18
- html = html[start_point..-1]
19
- unless ['{', '['].include?(html[0])
20
- raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
21
- end
22
-
23
- last_char = '{'
24
- curr_char = nil
25
- stack = [html[0]]
26
- i = 1
27
-
28
- context_closers = {
29
- '{' => '}',
30
- '[' => ']',
31
- '"' => '"',
32
- '/' => '/',
33
- }
34
-
35
- while i < html.length
36
- break if stack.empty?
37
-
38
- last_char = curr_char unless [' ', '\n'].include?(curr_char)
39
- curr_char = html[i]
40
- curr_context = stack.last
41
-
42
- if curr_char == context_closers[curr_context]
43
- stack.pop
44
- i += 1
45
- next
46
- end
47
-
48
- if ['"', '/'].include?(curr_context)
49
- if curr_char == '\\'
50
- i += 2
51
- next
52
- end
53
- else
54
- if context_closers.keys.include?(curr_char)
55
- unless curr_char == '/' && !['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
56
- stack.push(curr_char)
57
- end
58
- end
59
- end
60
-
61
- i += 1
62
- end
63
-
64
- full_obj = html[0...i]
65
- full_obj
66
- end
67
-
68
- def parse_for_object_from_startpoint(html, start_point)
69
- html = html[start_point..-1]
70
-
71
- unless ['{', '['].include?(html[0])
72
- raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
73
- end
74
-
75
- # First letter MUST be an open brace, so we put that in the stack,
76
- # and skip the first character.
77
- last_char = '{'
78
- curr_char = nil
79
- stack = [html[0]]
80
- i = 1
81
-
82
- context_closers = {
83
- '{' => '}',
84
- '[' => ']',
85
- '"' => '"',
86
- '/' => '/' # JavaScript regex
87
- }
88
-
89
- while i < html.length
90
- break if stack.empty?
91
-
92
- last_char = curr_char unless [' ', '\n'].include?(curr_char)
93
- curr_char = html[i]
94
- curr_context = stack.last
95
-
96
- # If we've reached a context closer, we can remove an element off the stack
97
- if curr_char == context_closers[curr_context]
98
- stack.pop
99
- i += 1
100
- next
101
- end
102
- # Strings and regex expressions require special context handling because they can contain
103
- # context openers *and* closers
104
- if ['"', '/'].include?(curr_context)
105
- # If there's a backslash in a string or regex expression, we skip a character
106
- if curr_char == '\\'
107
- i += 2
108
- next
109
- end
110
- else
111
- # Non-string contexts are when we need to look for context openers.
112
- if context_closers.keys.include?(curr_char)
113
- # Slash starts a regular expression depending on context
114
- unless curr_char == '/' && ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
115
- stack << curr_char
116
- end
117
- end
118
- end
119
-
120
- i += 1
121
- end
122
-
123
- full_obj = html[0..(i - 1)]
124
- full_obj
125
- end
126
-
127
- def throttling_array_split(js_array)
128
- results = []
129
- curr_substring = js_array[1..-1]
130
-
131
- comma_regex = /,/
132
- func_regex = /function\([^)]*\)/
133
-
134
- until curr_substring.empty?
135
- if curr_substring.start_with?('function')
136
- match = func_regex.match(curr_substring)
137
- match_start = match.begin(0)
138
- match_end = match.end(0)
139
-
140
- function_text = find_object_from_startpoint(curr_substring, match_end)
141
- full_function_def = curr_substring[0, match_end + function_text.length]
142
- results << full_function_def
143
- curr_substring = curr_substring[full_function_def.length + 1..-1]
144
- else
145
- match = comma_regex.match(curr_substring)
146
-
147
- begin
148
- match_start = match.begin(0)
149
- match_end = match.end(0)
150
- rescue NoMethodError
151
- match_start = curr_substring.length - 1
152
- match_end = match_start + 1
153
- end
154
-
155
- curr_el = curr_substring[0, match_start]
156
- results << curr_el
157
- curr_substring = curr_substring[match_end..-1]
158
- end
159
- end
160
-
161
- results
162
- end
163
- end
164
- end
1
+ module RubyTube
2
+ module Parser
3
+ module_function
4
+
5
+ def parse_for_object(html, preceding_regex)
6
+ regex = Regexp.new(preceding_regex)
7
+ result = regex.match(html)
8
+
9
+ if result.nil?
10
+ raise HTMLParseError, "No matches for regex #{preceding_regex}"
11
+ end
12
+ start_index = result.end(0)
13
+
14
+ return parse_for_object_from_startpoint(html, start_index)
15
+ end
16
+
17
+ def find_object_from_startpoint(html, start_point)
18
+ html = html[start_point..-1]
19
+ unless ['{', '['].include?(html[0])
20
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
21
+ end
22
+
23
+ last_char = '{'
24
+ curr_char = nil
25
+ stack = [html[0]]
26
+ i = 1
27
+
28
+ context_closers = {
29
+ '{' => '}',
30
+ '[' => ']',
31
+ '"' => '"',
32
+ '/' => '/',
33
+ }
34
+
35
+ while i < html.length
36
+ break if stack.empty?
37
+
38
+ last_char = curr_char unless [' ', '\n'].include?(curr_char)
39
+ curr_char = html[i]
40
+ curr_context = stack.last
41
+
42
+ if curr_char == context_closers[curr_context]
43
+ stack.pop
44
+ i += 1
45
+ next
46
+ end
47
+
48
+ if ['"', '/'].include?(curr_context)
49
+ if curr_char == '\\'
50
+ i += 2
51
+ next
52
+ end
53
+ else
54
+ if context_closers.keys.include?(curr_char)
55
+ unless curr_char == '/' && !['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
56
+ stack.push(curr_char)
57
+ end
58
+ end
59
+ end
60
+
61
+ i += 1
62
+ end
63
+
64
+ full_obj = html[0...i]
65
+ full_obj
66
+ end
67
+
68
+ def parse_for_object_from_startpoint(html, start_point)
69
+ html = html[start_point..-1]
70
+
71
+ unless ['{', '['].include?(html[0])
72
+ raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
73
+ end
74
+
75
+ # First letter MUST be an open brace, so we put that in the stack,
76
+ # and skip the first character.
77
+ last_char = '{'
78
+ curr_char = nil
79
+ stack = [html[0]]
80
+ i = 1
81
+
82
+ context_closers = {
83
+ '{' => '}',
84
+ '[' => ']',
85
+ '"' => '"',
86
+ '/' => '/' # JavaScript regex
87
+ }
88
+
89
+ while i < html.length
90
+ break if stack.empty?
91
+
92
+ last_char = curr_char unless [' ', '\n'].include?(curr_char)
93
+ curr_char = html[i]
94
+ curr_context = stack.last
95
+
96
+ # If we've reached a context closer, we can remove an element off the stack
97
+ if curr_char == context_closers[curr_context]
98
+ stack.pop
99
+ i += 1
100
+ next
101
+ end
102
+ # Strings and regex expressions require special context handling because they can contain
103
+ # context openers *and* closers
104
+ if ['"', '/'].include?(curr_context)
105
+ # If there's a backslash in a string or regex expression, we skip a character
106
+ if curr_char == '\\'
107
+ i += 2
108
+ next
109
+ end
110
+ else
111
+ # Non-string contexts are when we need to look for context openers.
112
+ if context_closers.keys.include?(curr_char)
113
+ # Slash starts a regular expression depending on context
114
+ unless curr_char == '/' && ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
115
+ stack << curr_char
116
+ end
117
+ end
118
+ end
119
+
120
+ i += 1
121
+ end
122
+
123
+ full_obj = html[0..(i - 1)]
124
+ full_obj
125
+ end
126
+
127
+ def throttling_array_split(js_array)
128
+ results = []
129
+ curr_substring = js_array[1..-1]
130
+
131
+ comma_regex = /,/
132
+ func_regex = /function\([^)]*\)/
133
+
134
+ until curr_substring.empty?
135
+ if curr_substring.start_with?('function')
136
+ match = func_regex.match(curr_substring)
137
+ match_start = match.begin(0)
138
+ match_end = match.end(0)
139
+
140
+ function_text = find_object_from_startpoint(curr_substring, match_end)
141
+ full_function_def = curr_substring[0, match_end + function_text.length]
142
+ results << full_function_def
143
+ curr_substring = curr_substring[full_function_def.length + 1..-1]
144
+ else
145
+ match = comma_regex.match(curr_substring)
146
+
147
+ begin
148
+ match_start = match.begin(0)
149
+ match_end = match.end(0)
150
+ rescue NoMethodError
151
+ match_start = curr_substring.length - 1
152
+ match_end = match_start + 1
153
+ end
154
+
155
+ curr_el = curr_substring[0, match_start]
156
+ results << curr_el
157
+ curr_substring = curr_substring[match_end..-1]
158
+ end
159
+ end
160
+
161
+ results
162
+ end
163
+ end
164
+ end
@@ -1,75 +1,75 @@
1
- module RubyTube
2
- module Request
3
- module_function
4
-
5
- DEFAULT_RANGE_SIZE = 9437184
6
-
7
- def get(url, options = {})
8
- send(:get, url, options).body
9
- end
10
-
11
- def post(url, options = {})
12
- send(:post, url, options).body
13
- end
14
-
15
- def head(url, options = {})
16
- send(:head, url, options).headers
17
- end
18
-
19
- def stream(url, timeout: 60, max_retries: 0)
20
- file_size = DEFAULT_RANGE_SIZE
21
- downloaded = 0
22
-
23
- while downloaded < file_size
24
- stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
25
- range_header = "bytes=#{downloaded}-#{stop_pos}"
26
- tries = 0
27
-
28
- while true
29
- begin
30
- if tries >= 1 + max_retries
31
- raise MaxRetriesExceeded
32
- end
33
- response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
34
- break
35
- rescue Faraday::TimeoutError
36
- rescue Faraday::ClientError => e
37
- raise e
38
- end
39
- tries += 1
40
- end
41
-
42
- if file_size == DEFAULT_RANGE_SIZE
43
- begin
44
- resp = send(:get, "#{url}&range=0-99999999999")
45
- content_range = resp.headers["Content-Length"]
46
- file_size = content_range.to_i
47
- rescue KeyError, IndexError, StandardError => e
48
- end
49
- end
50
-
51
- response.body.each_char do |chunk|
52
- downloaded += chunk.length
53
- yield chunk
54
- end
55
- end
56
- end
57
-
58
- def send(method, url, options = {})
59
- headers = { 'Content-Type': 'text/html' }
60
- options[:headers] && headers.merge!(options[:headers])
61
-
62
- connection = Faraday.new(url: url) do |faraday|
63
- faraday.response :follow_redirects
64
- faraday.adapter Faraday.default_adapter
65
- end
66
- response = connection.send(method) do |req|
67
- req.headers = headers
68
- options[:query] && req.params = options[:query]
69
- options[:data] && req.body = JSON.dump(options[:data])
70
- end
71
-
72
- response
73
- end
74
- end
75
- end
1
+ module RubyTube
2
+ module Request
3
+ module_function
4
+
5
+ DEFAULT_RANGE_SIZE = 9437184
6
+
7
+ def get(url, options = {})
8
+ send(:get, url, options).body
9
+ end
10
+
11
+ def post(url, options = {})
12
+ send(:post, url, options).body
13
+ end
14
+
15
+ def head(url, options = {})
16
+ send(:head, url, options).headers
17
+ end
18
+
19
+ def stream(url, timeout: 60, max_retries: 0)
20
+ file_size = DEFAULT_RANGE_SIZE
21
+ downloaded = 0
22
+
23
+ while downloaded < file_size
24
+ stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
25
+ range_header = "bytes=#{downloaded}-#{stop_pos}"
26
+ tries = 0
27
+
28
+ while true
29
+ begin
30
+ if tries >= 1 + max_retries
31
+ raise MaxRetriesExceeded
32
+ end
33
+ response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
34
+ break
35
+ rescue Faraday::TimeoutError
36
+ rescue Faraday::ClientError => e
37
+ raise e
38
+ end
39
+ tries += 1
40
+ end
41
+
42
+ if file_size == DEFAULT_RANGE_SIZE
43
+ begin
44
+ resp = send(:get, "#{url}&range=0-99999999999")
45
+ content_range = resp.headers["Content-Length"]
46
+ file_size = content_range.to_i
47
+ rescue KeyError, IndexError, StandardError => e
48
+ end
49
+ end
50
+
51
+ response.body.each_char do |chunk|
52
+ downloaded += chunk.length
53
+ yield chunk
54
+ end
55
+ end
56
+ end
57
+
58
+ def send(method, url, options = {})
59
+ headers = { 'Content-Type': 'text/html' }
60
+ options[:headers] && headers.merge!(options[:headers])
61
+
62
+ connection = Faraday.new(url: url) do |faraday|
63
+ faraday.response :follow_redirects
64
+ faraday.adapter Faraday.default_adapter
65
+ end
66
+ response = connection.send(method) do |req|
67
+ req.headers = headers
68
+ options[:query] && req.params = options[:query]
69
+ options[:data] && req.body = JSON.dump(options[:data])
70
+ end
71
+
72
+ response
73
+ end
74
+ end
75
+ end
@@ -1,89 +1,97 @@
1
- module RubyTube
2
- class Stream
3
- attr_accessor(
4
- :monostate,
5
- :url,
6
- :itag,
7
- :mime_type,
8
- :codecs,
9
- :type,
10
- :subtype,
11
- :file_size,
12
- :is_otf,
13
- :bitrate
14
- )
15
-
16
- def initialize(stream, monostate)
17
- self.monostate = monostate
18
-
19
- self.url = stream['url']
20
- self.itag = stream['itag'].to_i
21
-
22
- self.mime_type, self.codecs = Extractor.mime_type_codec(stream['mimeType'])
23
- self.type, self.subtype = mime_type.split('/')
24
-
25
- self.is_otf = stream['is_otf']
26
- self.bitrate = stream['bitrate']
27
-
28
- self.file_size = stream.fetch('contentLength', 0).to_i
29
- end
30
-
31
- def download(filename: nil, output_dir: nil)
32
- file_path = get_file_path(filename, output_dir)
33
-
34
- return file_path if File.exist?(file_path)
35
-
36
- bytes_remaining = file_size
37
-
38
- File.open(file_path, 'wb') do |f|
39
- begin
40
- Request.stream(url) do |chunk|
41
- bytes_remaining -= chunk.bytesize
42
- f.write(chunk)
43
- end
44
- rescue HTTPError => e
45
- raise e if e.code != 404
46
- end
47
- end
48
-
49
- file_path
50
- end
51
-
52
- def is_audio?
53
- type == 'audio'
54
- end
55
-
56
- def is_video?
57
- type == 'video'
58
- end
59
-
60
- def title
61
- monostate.title
62
- end
63
-
64
- def resolution
65
- stream_format.resolution
66
- end
67
-
68
- private
69
-
70
- def get_file_path(filename, output_dir, prefix = '')
71
- filename = default_filename unless filename
72
-
73
- if prefix
74
- filename = "#{prefix}#{filename}"
75
- end
76
-
77
- output_path = Utils.target_directory(output_dir)
78
- File.join(output_path, filename)
79
- end
80
-
81
- def default_filename
82
- "#{monostate.title}.#{subtype}"
83
- end
84
-
85
- def stream_format
86
- @stream_format ||= StreamFormat.new(itag)
87
- end
88
- end
89
- end
1
+ module RubyTube
2
+ class Stream
3
+ attr_accessor(
4
+ :monostate,
5
+ :url,
6
+ :itag,
7
+ :mime_type,
8
+ :codecs,
9
+ :type,
10
+ :subtype,
11
+ :file_size,
12
+ :is_otf,
13
+ :bitrate
14
+ )
15
+
16
+ def initialize(stream, monostate)
17
+ self.monostate = monostate
18
+
19
+ self.url = stream['url']
20
+ self.itag = stream['itag'].to_i
21
+
22
+ self.mime_type, self.codecs = Extractor.mime_type_codec(stream['mimeType'])
23
+ self.type, self.subtype = mime_type.split('/')
24
+
25
+ self.is_otf = stream['is_otf']
26
+ self.bitrate = stream['bitrate']
27
+
28
+ self.file_size = stream.fetch('contentLength', 0).to_i
29
+ end
30
+
31
+ def download(filename: nil, output_dir: nil)
32
+ file_path = get_file_path(filename, output_dir)
33
+
34
+ return file_path if File.exist?(file_path)
35
+
36
+ bytes_remaining = file_size
37
+
38
+ File.open(file_path, 'wb') do |f|
39
+ begin
40
+ Request.stream(url) do |chunk|
41
+ bytes_remaining -= chunk.bytesize
42
+ f.write(chunk)
43
+ end
44
+ rescue HTTPError => e
45
+ raise e if e.code != 404
46
+ end
47
+ end
48
+
49
+ file_path
50
+ end
51
+
52
+ def is_audio?
53
+ type == 'audio'
54
+ end
55
+
56
+ def is_video?
57
+ type == 'video'
58
+ end
59
+
60
+ def is_adaptive?
61
+ codecs.size % 2 == 1
62
+ end
63
+
64
+ def is_progressive?
65
+ !is_adaptive?
66
+ end
67
+
68
+ def title
69
+ monostate.title
70
+ end
71
+
72
+ def resolution
73
+ stream_format.resolution
74
+ end
75
+
76
+ private
77
+
78
+ def get_file_path(filename, output_dir, prefix = '')
79
+ filename = default_filename unless filename
80
+
81
+ if prefix
82
+ filename = "#{prefix}#{filename}"
83
+ end
84
+
85
+ output_path = Utils.target_directory(output_dir)
86
+ File.join(output_path, filename)
87
+ end
88
+
89
+ def default_filename
90
+ "#{monostate.title}.#{subtype}"
91
+ end
92
+
93
+ def stream_format
94
+ @stream_format ||= StreamFormat.new(itag)
95
+ end
96
+ end
97
+ end