http_utilities 1.1.2 → 1.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -9
- data/http_utilities.gemspec +10 -17
- data/lib/http_utilities/http/client.rb +62 -8
- data/lib/http_utilities/http/mechanize/client.rb +5 -1
- data/lib/http_utilities/http/proxy_support.rb +10 -0
- data/lib/http_utilities/http/request.rb +2 -3
- data/lib/http_utilities/http/response.rb +15 -4
- data/lib/http_utilities/http/user_agent.rb +1 -1
- data/lib/http_utilities/proxies/mongo/proxy_module.rb +97 -0
- data/lib/http_utilities/proxies/mysql/proxy_module.rb +93 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +34 -66
- data/lib/http_utilities/proxies/proxy_seeder.rb +13 -11
- data/lib/http_utilities.rb +9 -17
- data/spec/http_utilities/client_spec.rb +5 -107
- data/spec/spec_helper.rb +3 -5
- metadata +21 -41
- data/lib/http_utilities/http/adapters/curb.rb +0 -107
- data/lib/http_utilities/http/adapters/net_http.rb +0 -135
- data/lib/http_utilities/http/adapters/open_uri.rb +0 -46
- data/lib/http_utilities/http/cookies.rb +0 -49
- data/lib/http_utilities/http/format.rb +0 -26
- data/lib/http_utilities/http/get.rb +0 -67
- data/lib/http_utilities/http/post.rb +0 -32
- data/lib/http_utilities/proxies/proxy_module.rb +0 -89
@@ -1,49 +0,0 @@
|
|
1
|
-
module HttpUtilities
|
2
|
-
module Http
|
3
|
-
module Cookies
|
4
|
-
|
5
|
-
def handle_cookies(response)
|
6
|
-
cookies = nil
|
7
|
-
|
8
|
-
if (response && response.is_a?(Net::HTTPResponse))
|
9
|
-
cookie_fields = response.get_fields('Set-Cookie')
|
10
|
-
|
11
|
-
if (cookie_fields && cookie_fields.any?)
|
12
|
-
cookies = []
|
13
|
-
cookie_fields.each do |cookie|
|
14
|
-
filtered_cookie = cookie.split('; ').first
|
15
|
-
cookies << filtered_cookie
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
elsif (response && response.is_a?(HttpUtilities::Http::Response))
|
20
|
-
cookies = (response.request && response.request.cookies) ? response.request.cookies : nil
|
21
|
-
end
|
22
|
-
|
23
|
-
return cookies
|
24
|
-
end
|
25
|
-
|
26
|
-
def format_cookies(cookies)
|
27
|
-
cookie_string = ""
|
28
|
-
cookies.each {|cookie| cookie_string += "#{cookie}; "}
|
29
|
-
|
30
|
-
return cookie_string
|
31
|
-
end
|
32
|
-
|
33
|
-
def set_cookies(headers, cookies)
|
34
|
-
if (cookies && cookies.any?)
|
35
|
-
cookie_string = (cookies && cookies.is_a?(Array)) ? format_cookies(cookies) : nil
|
36
|
-
|
37
|
-
if (cookie_string)
|
38
|
-
cookie_hash = {'cookie' => cookie_string}
|
39
|
-
headers = (headers && !headers.empty?) ? headers.merge(cookie_hash) : cookie_hash
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
return headers
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
@@ -1,26 +0,0 @@
|
|
1
|
-
# -*- encoding : utf-8 -*-
|
2
|
-
require 'nokogiri'
|
3
|
-
|
4
|
-
module HttpUtilities
|
5
|
-
module Http
|
6
|
-
module Format
|
7
|
-
|
8
|
-
def as_html
|
9
|
-
self.parsed_body = (self.body && self.body != "") ? Nokogiri::HTML(self.body, nil, "utf-8") : nil
|
10
|
-
end
|
11
|
-
|
12
|
-
def as_xml
|
13
|
-
self.parsed_body = (self.body && self.body != "") ? Nokogiri::XML(self.body, nil, "utf-8") : nil
|
14
|
-
end
|
15
|
-
|
16
|
-
def as_multi_xml
|
17
|
-
self.parsed_body = (self.body && self.body != "") ? MultiXml.parse(self.body) : nil
|
18
|
-
end
|
19
|
-
|
20
|
-
def as_json
|
21
|
-
self.parsed_body = (self.body && self.body != "") ? self.body.to_json : nil
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
module HttpUtilities
|
2
|
-
module Http
|
3
|
-
module Get
|
4
|
-
|
5
|
-
def retrieve_raw_content(url, options = {})
|
6
|
-
return retrieve_content_from_url(url, options)
|
7
|
-
end
|
8
|
-
|
9
|
-
def retrieve_raw_xml(url, options = {})
|
10
|
-
return retrieve_content_from_url(url, options)
|
11
|
-
end
|
12
|
-
|
13
|
-
def retrieve_parsed_xml(url, options = {})
|
14
|
-
options.merge!({:force_encoding => true, :format => :xml})
|
15
|
-
return retrieve_content_from_url(url, options)
|
16
|
-
end
|
17
|
-
|
18
|
-
def retrieve_parsed_html(url, options = {})
|
19
|
-
options.merge!({:force_encoding => true, :format => :html})
|
20
|
-
return retrieve_content_from_url(url, options)
|
21
|
-
end
|
22
|
-
|
23
|
-
def retrieve_parsed_html_and_fallback_to_proxies(url, options = {})
|
24
|
-
options.merge!({:force_encoding => true, :format => :html})
|
25
|
-
return retrieve_raw_content_and_fallback_to_proxies(url, options)
|
26
|
-
end
|
27
|
-
|
28
|
-
def retrieve_parsed_xml_and_fallback_to_proxies(url, options = {})
|
29
|
-
options.merge!({:force_encoding => true, :format => :xml})
|
30
|
-
return retrieve_raw_content_and_fallback_to_proxies(url, options)
|
31
|
-
end
|
32
|
-
|
33
|
-
def retrieve_raw_content_and_fallback_to_proxies(url, options = {})
|
34
|
-
retries = 0
|
35
|
-
max_retries = options.delete(:maximum_retrieval_retries) { |e| 5 }
|
36
|
-
options.merge!({:force_encoding => true})
|
37
|
-
|
38
|
-
response = retrieve_content_from_url(url, options)
|
39
|
-
|
40
|
-
while (!response && retries < max_retries) do
|
41
|
-
options.merge!({:use_proxy => true})
|
42
|
-
response = retrieve_content_from_url(url, options)
|
43
|
-
retries += 1
|
44
|
-
end
|
45
|
-
|
46
|
-
return response
|
47
|
-
end
|
48
|
-
|
49
|
-
def retrieve_content_from_url(url, options = {})
|
50
|
-
response = nil
|
51
|
-
method = options[:method] || :net_http
|
52
|
-
|
53
|
-
if (method.eql?(:open_uri))
|
54
|
-
response = retrieve_open_uri_content(url, options)
|
55
|
-
elsif (method.eql?(:net_http))
|
56
|
-
response = retrieve_net_http_content(url, options)
|
57
|
-
elsif (method.eql?(:curl))
|
58
|
-
response = retrieve_curl_content(url, options)
|
59
|
-
end
|
60
|
-
|
61
|
-
return response
|
62
|
-
end
|
63
|
-
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
@@ -1,32 +0,0 @@
|
|
1
|
-
module HttpUtilities
|
2
|
-
module Http
|
3
|
-
module Post
|
4
|
-
|
5
|
-
def post_and_retrieve_parsed_html(url, data, options = {})
|
6
|
-
options.merge!({:force_encoding => true, :format => :html})
|
7
|
-
return post_and_retrieve_content(url, data, options)
|
8
|
-
end
|
9
|
-
|
10
|
-
def post_and_retrieve_parsed_xml(url, data, options = {})
|
11
|
-
options.merge!({:force_encoding => true, :format => :xml})
|
12
|
-
return post_and_retrieve_content(url, data, options)
|
13
|
-
end
|
14
|
-
|
15
|
-
def post_and_retrieve_content(url, data, options = {})
|
16
|
-
response = nil
|
17
|
-
method = options[:method] || :net_http
|
18
|
-
response_only = options.delete(:response_only) { |e| true }
|
19
|
-
|
20
|
-
if (method.eql?(:net_http))
|
21
|
-
response = post_and_retrieve_content_using_net_http(url, data, options)
|
22
|
-
elsif (method.eql?(:curl))
|
23
|
-
response = post_and_retrieve_content_using_curl(url, data, options)
|
24
|
-
end
|
25
|
-
|
26
|
-
return response
|
27
|
-
end
|
28
|
-
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
@@ -1,89 +0,0 @@
|
|
1
|
-
module HttpUtilities
|
2
|
-
module Proxies
|
3
|
-
module ProxyModule
|
4
|
-
|
5
|
-
def self.included(base)
|
6
|
-
base.send :extend, ClassMethods
|
7
|
-
base.send :include, InstanceMethods
|
8
|
-
end
|
9
|
-
|
10
|
-
module ClassMethods
|
11
|
-
def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
|
12
|
-
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
13
|
-
proxies = proxies.where(["(last_checked_at IS NULL OR last_checked_at < ?)", date])
|
14
|
-
proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts])
|
15
|
-
proxies = proxies.order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC")
|
16
|
-
proxies = proxies.limit(limit)
|
17
|
-
|
18
|
-
return proxies
|
19
|
-
end
|
20
|
-
|
21
|
-
def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil)
|
22
|
-
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
23
|
-
proxies = proxies.where(["valid_proxy = ? AND last_checked_at IS NOT NULL", true])
|
24
|
-
proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts]) if maximum_failed_attempts
|
25
|
-
|
26
|
-
order_clause = case ActiveRecord::Base.connection.class.name
|
27
|
-
when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter"
|
28
|
-
"RAND() DESC"
|
29
|
-
when "ActiveRecord::ConnectionAdapters::SQLite3Adapter"
|
30
|
-
"RANDOM() DESC"
|
31
|
-
else
|
32
|
-
"RAND() DESC"
|
33
|
-
end
|
34
|
-
|
35
|
-
proxies = proxies.order(order_clause)
|
36
|
-
|
37
|
-
proxy = nil
|
38
|
-
|
39
|
-
uncached do
|
40
|
-
proxy = proxies.limit(1).first
|
41
|
-
end
|
42
|
-
|
43
|
-
return proxy
|
44
|
-
end
|
45
|
-
|
46
|
-
def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
47
|
-
proxies = ::Proxy.where(nil)
|
48
|
-
proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
|
49
|
-
proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
|
50
|
-
|
51
|
-
return proxies
|
52
|
-
end
|
53
|
-
|
54
|
-
def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
|
55
|
-
proxy_address = "#{proxy_host}:#{proxy_port}"
|
56
|
-
proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
|
57
|
-
return proxy_address
|
58
|
-
end
|
59
|
-
|
60
|
-
def format_proxy_credentials(username, password)
|
61
|
-
return "#{username}:#{password}"
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
module InstanceMethods
|
66
|
-
def proxy_address(include_http = false)
|
67
|
-
return ::Proxy.format_proxy_address(self.host, self.port, include_http)
|
68
|
-
end
|
69
|
-
|
70
|
-
def proxy_credentials
|
71
|
-
return ::Proxy.format_proxy_credentials(self.username, self.password)
|
72
|
-
end
|
73
|
-
|
74
|
-
def socks_proxy_credentials
|
75
|
-
credentials = {}
|
76
|
-
|
77
|
-
if (!self.username.empty? && !self.password.empty?)
|
78
|
-
credentials = {user: self.username, password: self.password}
|
79
|
-
elsif (!self.username.empty? && self.password.empty?)
|
80
|
-
credentials = {user: self.username}
|
81
|
-
end
|
82
|
-
|
83
|
-
return credentials
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|