http_utilities 1.1.2 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -9
- data/http_utilities.gemspec +10 -17
- data/lib/http_utilities/http/client.rb +62 -8
- data/lib/http_utilities/http/mechanize/client.rb +5 -1
- data/lib/http_utilities/http/proxy_support.rb +10 -0
- data/lib/http_utilities/http/request.rb +2 -3
- data/lib/http_utilities/http/response.rb +15 -4
- data/lib/http_utilities/http/user_agent.rb +1 -1
- data/lib/http_utilities/proxies/mongo/proxy_module.rb +97 -0
- data/lib/http_utilities/proxies/mysql/proxy_module.rb +93 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +34 -66
- data/lib/http_utilities/proxies/proxy_seeder.rb +13 -11
- data/lib/http_utilities.rb +9 -17
- data/spec/http_utilities/client_spec.rb +5 -107
- data/spec/spec_helper.rb +3 -5
- metadata +21 -41
- data/lib/http_utilities/http/adapters/curb.rb +0 -107
- data/lib/http_utilities/http/adapters/net_http.rb +0 -135
- data/lib/http_utilities/http/adapters/open_uri.rb +0 -46
- data/lib/http_utilities/http/cookies.rb +0 -49
- data/lib/http_utilities/http/format.rb +0 -26
- data/lib/http_utilities/http/get.rb +0 -67
- data/lib/http_utilities/http/post.rb +0 -32
- data/lib/http_utilities/proxies/proxy_module.rb +0 -89
@@ -1,49 +0,0 @@
|
|
1
|
-
module HttpUtilities
|
2
|
-
module Http
|
3
|
-
module Cookies
|
4
|
-
|
5
|
-
def handle_cookies(response)
|
6
|
-
cookies = nil
|
7
|
-
|
8
|
-
if (response && response.is_a?(Net::HTTPResponse))
|
9
|
-
cookie_fields = response.get_fields('Set-Cookie')
|
10
|
-
|
11
|
-
if (cookie_fields && cookie_fields.any?)
|
12
|
-
cookies = []
|
13
|
-
cookie_fields.each do |cookie|
|
14
|
-
filtered_cookie = cookie.split('; ').first
|
15
|
-
cookies << filtered_cookie
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
elsif (response && response.is_a?(HttpUtilities::Http::Response))
|
20
|
-
cookies = (response.request && response.request.cookies) ? response.request.cookies : nil
|
21
|
-
end
|
22
|
-
|
23
|
-
return cookies
|
24
|
-
end
|
25
|
-
|
26
|
-
def format_cookies(cookies)
|
27
|
-
cookie_string = ""
|
28
|
-
cookies.each {|cookie| cookie_string += "#{cookie}; "}
|
29
|
-
|
30
|
-
return cookie_string
|
31
|
-
end
|
32
|
-
|
33
|
-
def set_cookies(headers, cookies)
|
34
|
-
if (cookies && cookies.any?)
|
35
|
-
cookie_string = (cookies && cookies.is_a?(Array)) ? format_cookies(cookies) : nil
|
36
|
-
|
37
|
-
if (cookie_string)
|
38
|
-
cookie_hash = {'cookie' => cookie_string}
|
39
|
-
headers = (headers && !headers.empty?) ? headers.merge(cookie_hash) : cookie_hash
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
return headers
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
@@ -1,26 +0,0 @@
|
|
1
|
-
# -*- encoding : utf-8 -*-
|
2
|
-
require 'nokogiri'
|
3
|
-
|
4
|
-
module HttpUtilities
|
5
|
-
module Http
|
6
|
-
module Format
|
7
|
-
|
8
|
-
def as_html
|
9
|
-
self.parsed_body = (self.body && self.body != "") ? Nokogiri::HTML(self.body, nil, "utf-8") : nil
|
10
|
-
end
|
11
|
-
|
12
|
-
def as_xml
|
13
|
-
self.parsed_body = (self.body && self.body != "") ? Nokogiri::XML(self.body, nil, "utf-8") : nil
|
14
|
-
end
|
15
|
-
|
16
|
-
def as_multi_xml
|
17
|
-
self.parsed_body = (self.body && self.body != "") ? MultiXml.parse(self.body) : nil
|
18
|
-
end
|
19
|
-
|
20
|
-
def as_json
|
21
|
-
self.parsed_body = (self.body && self.body != "") ? self.body.to_json : nil
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
module HttpUtilities
|
2
|
-
module Http
|
3
|
-
module Get
|
4
|
-
|
5
|
-
def retrieve_raw_content(url, options = {})
|
6
|
-
return retrieve_content_from_url(url, options)
|
7
|
-
end
|
8
|
-
|
9
|
-
def retrieve_raw_xml(url, options = {})
|
10
|
-
return retrieve_content_from_url(url, options)
|
11
|
-
end
|
12
|
-
|
13
|
-
def retrieve_parsed_xml(url, options = {})
|
14
|
-
options.merge!({:force_encoding => true, :format => :xml})
|
15
|
-
return retrieve_content_from_url(url, options)
|
16
|
-
end
|
17
|
-
|
18
|
-
def retrieve_parsed_html(url, options = {})
|
19
|
-
options.merge!({:force_encoding => true, :format => :html})
|
20
|
-
return retrieve_content_from_url(url, options)
|
21
|
-
end
|
22
|
-
|
23
|
-
def retrieve_parsed_html_and_fallback_to_proxies(url, options = {})
|
24
|
-
options.merge!({:force_encoding => true, :format => :html})
|
25
|
-
return retrieve_raw_content_and_fallback_to_proxies(url, options)
|
26
|
-
end
|
27
|
-
|
28
|
-
def retrieve_parsed_xml_and_fallback_to_proxies(url, options = {})
|
29
|
-
options.merge!({:force_encoding => true, :format => :xml})
|
30
|
-
return retrieve_raw_content_and_fallback_to_proxies(url, options)
|
31
|
-
end
|
32
|
-
|
33
|
-
def retrieve_raw_content_and_fallback_to_proxies(url, options = {})
|
34
|
-
retries = 0
|
35
|
-
max_retries = options.delete(:maximum_retrieval_retries) { |e| 5 }
|
36
|
-
options.merge!({:force_encoding => true})
|
37
|
-
|
38
|
-
response = retrieve_content_from_url(url, options)
|
39
|
-
|
40
|
-
while (!response && retries < max_retries) do
|
41
|
-
options.merge!({:use_proxy => true})
|
42
|
-
response = retrieve_content_from_url(url, options)
|
43
|
-
retries += 1
|
44
|
-
end
|
45
|
-
|
46
|
-
return response
|
47
|
-
end
|
48
|
-
|
49
|
-
def retrieve_content_from_url(url, options = {})
|
50
|
-
response = nil
|
51
|
-
method = options[:method] || :net_http
|
52
|
-
|
53
|
-
if (method.eql?(:open_uri))
|
54
|
-
response = retrieve_open_uri_content(url, options)
|
55
|
-
elsif (method.eql?(:net_http))
|
56
|
-
response = retrieve_net_http_content(url, options)
|
57
|
-
elsif (method.eql?(:curl))
|
58
|
-
response = retrieve_curl_content(url, options)
|
59
|
-
end
|
60
|
-
|
61
|
-
return response
|
62
|
-
end
|
63
|
-
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
@@ -1,32 +0,0 @@
|
|
1
|
-
module HttpUtilities
|
2
|
-
module Http
|
3
|
-
module Post
|
4
|
-
|
5
|
-
def post_and_retrieve_parsed_html(url, data, options = {})
|
6
|
-
options.merge!({:force_encoding => true, :format => :html})
|
7
|
-
return post_and_retrieve_content(url, data, options)
|
8
|
-
end
|
9
|
-
|
10
|
-
def post_and_retrieve_parsed_xml(url, data, options = {})
|
11
|
-
options.merge!({:force_encoding => true, :format => :xml})
|
12
|
-
return post_and_retrieve_content(url, data, options)
|
13
|
-
end
|
14
|
-
|
15
|
-
def post_and_retrieve_content(url, data, options = {})
|
16
|
-
response = nil
|
17
|
-
method = options[:method] || :net_http
|
18
|
-
response_only = options.delete(:response_only) { |e| true }
|
19
|
-
|
20
|
-
if (method.eql?(:net_http))
|
21
|
-
response = post_and_retrieve_content_using_net_http(url, data, options)
|
22
|
-
elsif (method.eql?(:curl))
|
23
|
-
response = post_and_retrieve_content_using_curl(url, data, options)
|
24
|
-
end
|
25
|
-
|
26
|
-
return response
|
27
|
-
end
|
28
|
-
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
@@ -1,89 +0,0 @@
|
|
1
|
-
module HttpUtilities
|
2
|
-
module Proxies
|
3
|
-
module ProxyModule
|
4
|
-
|
5
|
-
def self.included(base)
|
6
|
-
base.send :extend, ClassMethods
|
7
|
-
base.send :include, InstanceMethods
|
8
|
-
end
|
9
|
-
|
10
|
-
module ClassMethods
|
11
|
-
def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
|
12
|
-
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
13
|
-
proxies = proxies.where(["(last_checked_at IS NULL OR last_checked_at < ?)", date])
|
14
|
-
proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts])
|
15
|
-
proxies = proxies.order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC")
|
16
|
-
proxies = proxies.limit(limit)
|
17
|
-
|
18
|
-
return proxies
|
19
|
-
end
|
20
|
-
|
21
|
-
def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil)
|
22
|
-
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
23
|
-
proxies = proxies.where(["valid_proxy = ? AND last_checked_at IS NOT NULL", true])
|
24
|
-
proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts]) if maximum_failed_attempts
|
25
|
-
|
26
|
-
order_clause = case ActiveRecord::Base.connection.class.name
|
27
|
-
when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter"
|
28
|
-
"RAND() DESC"
|
29
|
-
when "ActiveRecord::ConnectionAdapters::SQLite3Adapter"
|
30
|
-
"RANDOM() DESC"
|
31
|
-
else
|
32
|
-
"RAND() DESC"
|
33
|
-
end
|
34
|
-
|
35
|
-
proxies = proxies.order(order_clause)
|
36
|
-
|
37
|
-
proxy = nil
|
38
|
-
|
39
|
-
uncached do
|
40
|
-
proxy = proxies.limit(1).first
|
41
|
-
end
|
42
|
-
|
43
|
-
return proxy
|
44
|
-
end
|
45
|
-
|
46
|
-
def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
47
|
-
proxies = ::Proxy.where(nil)
|
48
|
-
proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
|
49
|
-
proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
|
50
|
-
|
51
|
-
return proxies
|
52
|
-
end
|
53
|
-
|
54
|
-
def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
|
55
|
-
proxy_address = "#{proxy_host}:#{proxy_port}"
|
56
|
-
proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
|
57
|
-
return proxy_address
|
58
|
-
end
|
59
|
-
|
60
|
-
def format_proxy_credentials(username, password)
|
61
|
-
return "#{username}:#{password}"
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
module InstanceMethods
|
66
|
-
def proxy_address(include_http = false)
|
67
|
-
return ::Proxy.format_proxy_address(self.host, self.port, include_http)
|
68
|
-
end
|
69
|
-
|
70
|
-
def proxy_credentials
|
71
|
-
return ::Proxy.format_proxy_credentials(self.username, self.password)
|
72
|
-
end
|
73
|
-
|
74
|
-
def socks_proxy_credentials
|
75
|
-
credentials = {}
|
76
|
-
|
77
|
-
if (!self.username.empty? && !self.password.empty?)
|
78
|
-
credentials = {user: self.username, password: self.password}
|
79
|
-
elsif (!self.username.empty? && self.password.empty?)
|
80
|
-
credentials = {user: self.username}
|
81
|
-
end
|
82
|
-
|
83
|
-
return credentials
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|