http_utilities 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +22 -0
- data/README +15 -0
- data/Rakefile +87 -0
- data/VERSION +1 -0
- data/http_utilities.gemspec +78 -0
- data/lib/generators/active_record/http_utilities_generator.rb +21 -0
- data/lib/generators/active_record/templates/migration.rb +34 -0
- data/lib/generators/active_record/templates/proxy.rb +3 -0
- data/lib/generators/helpers/file_helper.rb +35 -0
- data/lib/generators/helpers/orm_helpers.rb +15 -0
- data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
- data/lib/generators/templates/http_utilities.rb +2 -0
- data/lib/generators/templates/user_agents.yml +3419 -0
- data/lib/http_utilities/http/adapters/curb.rb +107 -0
- data/lib/http_utilities/http/adapters/net_http.rb +130 -0
- data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
- data/lib/http_utilities/http/client.rb +22 -0
- data/lib/http_utilities/http/cookies.rb +49 -0
- data/lib/http_utilities/http/format.rb +26 -0
- data/lib/http_utilities/http/get.rb +67 -0
- data/lib/http_utilities/http/logger.rb +11 -0
- data/lib/http_utilities/http/mechanize/client.rb +197 -0
- data/lib/http_utilities/http/post.rb +32 -0
- data/lib/http_utilities/http/proxy_support.rb +88 -0
- data/lib/http_utilities/http/request.rb +20 -0
- data/lib/http_utilities/http/response.rb +50 -0
- data/lib/http_utilities/http/url.rb +48 -0
- data/lib/http_utilities/http/user_agent.rb +3380 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
- data/lib/http_utilities/proxies/proxy_module.rb +70 -0
- data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
- data/lib/http_utilities/railtie.rb +11 -0
- data/lib/http_utilities.rb +47 -0
- data/lib/tasks/http_utilities_tasks.rake +19 -0
- data/spec/database.yml.example +10 -0
- data/spec/http_utilities/client_spec.rb +145 -0
- data/spec/http_utilities/mechanize_client_spec.rb +35 -0
- data/spec/http_utilities/proxy_checker_spec.rb +11 -0
- data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
- data/spec/http_utilities/proxy_spec.rb +114 -0
- data/spec/models.rb +6 -0
- data/spec/schema.rb +30 -0
- data/spec/spec_helper.rb +50 -0
- metadata +209 -0
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module HttpUtilities
|
4
|
+
module Http
|
5
|
+
module Adapters
|
6
|
+
module Curb
|
7
|
+
|
8
|
+
def post_and_retrieve_content_using_curl(url, data, options = {})
|
9
|
+
request = self.set_curl_options(url, options)
|
10
|
+
response = nil
|
11
|
+
|
12
|
+
if (request.interface && data)
|
13
|
+
if (data.is_a?(Hash))
|
14
|
+
data = data.map { |key, value| Curl::PostField.content(key.to_s, value.to_s) }
|
15
|
+
end
|
16
|
+
|
17
|
+
request.interface.http_post(data)
|
18
|
+
response = request.interface.body_str
|
19
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
20
|
+
end
|
21
|
+
|
22
|
+
return response
|
23
|
+
end
|
24
|
+
|
25
|
+
def retrieve_curl_content(url, options = {})
|
26
|
+
request = self.set_curl_options(url, options)
|
27
|
+
response = nil
|
28
|
+
|
29
|
+
begin
|
30
|
+
success = request.interface.perform
|
31
|
+
response = request.interface.body_str
|
32
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
33
|
+
|
34
|
+
rescue Exception => e
|
35
|
+
puts "\n\n#{Time.now}: IMPORTANT! Error occurred while trying to retrieve content from url #{url} and parse it. Error: #{e.message}. Error Class: #{e.class}"
|
36
|
+
end
|
37
|
+
|
38
|
+
return response
|
39
|
+
end
|
40
|
+
|
41
|
+
def go_to_url(url, options = {})
|
42
|
+
success = false
|
43
|
+
|
44
|
+
request = self.set_curl_options(url, options)
|
45
|
+
|
46
|
+
success = request.interface.perform
|
47
|
+
success = (success && curl.response_code.eql?(200))
|
48
|
+
|
49
|
+
return success
|
50
|
+
end
|
51
|
+
|
52
|
+
def set_curl_options(url, options = {})
|
53
|
+
options = options.clone()
|
54
|
+
|
55
|
+
request = HttpUtilities::Http::Request.new
|
56
|
+
request.set_proxy_options(options)
|
57
|
+
|
58
|
+
accept_cookies = options.delete(:accept_cookies) { |e| false }
|
59
|
+
timeout = options.delete(:timeout) { |e| 120 }
|
60
|
+
maximum_redirects = options.delete(:maximum_redirects) { |e| 10 }
|
61
|
+
disable_auth = options.delete(:disable_auth) { |e| false }
|
62
|
+
accept_content_type = options.delete(:accept_content_type) { |e| false }
|
63
|
+
content_type = options.delete(:content_type) { |e| false }
|
64
|
+
cookie_file = nil
|
65
|
+
|
66
|
+
curl = Curl::Easy.new(url) do |c|
|
67
|
+
c.headers ||= {}
|
68
|
+
c.headers["User-Agent"] = c.useragent = request.user_agent
|
69
|
+
c.headers["Accept"] = accept_content_type if (accept_content_type)
|
70
|
+
c.headers["Content-Type"] = content_type if (content_type)
|
71
|
+
c.timeout = timeout
|
72
|
+
c.ssl_verify_host = false
|
73
|
+
c.follow_location = true
|
74
|
+
c.max_redirects = maximum_redirects
|
75
|
+
|
76
|
+
if (disable_auth)
|
77
|
+
c.http_auth_types = nil
|
78
|
+
c.proxy_auth_types = nil
|
79
|
+
c.unrestricted_auth = false
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
if (accept_cookies)
|
84
|
+
FileUtils.mkdir_p File.join(Rails.root, "tmp/cookies")
|
85
|
+
identifier = Time.now.to_date.to_s(:db).gsub("-", "_").gsub("\s", "_").gsub(":", "_")
|
86
|
+
cookie_file = File.join(Rails.root, "tmp/cookies", "cookies_#{identifier}.txt")
|
87
|
+
|
88
|
+
curl.enable_cookies = true
|
89
|
+
curl.cookiejar = cookie_file
|
90
|
+
curl.cookiefile = cookie_file
|
91
|
+
end
|
92
|
+
|
93
|
+
if (request.proxy[:host] && request.proxy[:port])
|
94
|
+
curl.proxy_url = ::Proxy.format_proxy_address(request.proxy[:host], request.proxy[:port], false)
|
95
|
+
curl.proxy_type = 5 if (request.proxy[:protocol] && request.proxy[:protocol].downcase.eql?('socks5'))
|
96
|
+
curl.proxypwd = ::Proxy.format_proxy_credentials(request.proxy[:username], request.proxy[:password]) if (request.proxy[:username] && request.proxy[:password])
|
97
|
+
end
|
98
|
+
|
99
|
+
request.interface = curl
|
100
|
+
|
101
|
+
return request
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module HttpUtilities
|
6
|
+
module Http
|
7
|
+
module Adapters
|
8
|
+
module NetHttp
|
9
|
+
|
10
|
+
def post_and_retrieve_content_using_net_http(url, data, options = {})
|
11
|
+
uri = URI.parse(url)
|
12
|
+
request = set_net_http_options(uri, options)
|
13
|
+
response = nil
|
14
|
+
|
15
|
+
opts = options.clone()
|
16
|
+
content_type = opts.delete(:content_type) { |e| nil }
|
17
|
+
|
18
|
+
if (request.interface && data)
|
19
|
+
data = (data.is_a?(Hash)) ? generate_request_params(data) : data
|
20
|
+
|
21
|
+
request.interface.start do |http|
|
22
|
+
headers = {}
|
23
|
+
headers["User-Agent"] = request.user_agent
|
24
|
+
headers["Content-Type"] = content_type if (content_type)
|
25
|
+
|
26
|
+
http.post(uri.request_uri, data, headers) do |response_data|
|
27
|
+
response = response_data
|
28
|
+
end
|
29
|
+
|
30
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
return response
|
35
|
+
end
|
36
|
+
|
37
|
+
def set_net_http_options(uri, options = {})
|
38
|
+
request = HttpUtilities::Http::Request.new
|
39
|
+
request.set_proxy_options(options)
|
40
|
+
request.interface = Net::HTTP.new(uri.host, uri.port, request.proxy[:host], request.proxy[:port])
|
41
|
+
|
42
|
+
return request
|
43
|
+
end
|
44
|
+
|
45
|
+
def retrieve_net_http_content(url, options = {})
|
46
|
+
uri = URI.parse(url)
|
47
|
+
request = set_net_http_options(uri, options)
|
48
|
+
return perform_net_http_request(request, uri, options)
|
49
|
+
end
|
50
|
+
|
51
|
+
def perform_net_http_request(request_or_url, uri = nil, options = {}, redirect_count = 0, max_redirects = 5)
|
52
|
+
request = nil
|
53
|
+
response = nil
|
54
|
+
|
55
|
+
if (request_or_url)
|
56
|
+
opts = (options.is_a?(Hash)) ? options.clone() : {}
|
57
|
+
retries = opts.delete(:retries) { |e| 3 }
|
58
|
+
force_encoding = opts.delete(:force_encoding) { |e| false }
|
59
|
+
cookies = opts.delete(:cookies) { |e| nil }
|
60
|
+
timeout = opts.delete(:timeout) { |e| 30 }
|
61
|
+
|
62
|
+
if (request_or_url.is_a?(String))
|
63
|
+
uri = URI.parse(request_or_url)
|
64
|
+
request = self.set_net_http_options(uri, options)
|
65
|
+
else
|
66
|
+
request = request_or_url
|
67
|
+
end
|
68
|
+
|
69
|
+
if (uri && uri.respond_to?(:request_uri) && uri.request_uri)
|
70
|
+
headers = {"User-Agent" => request.user_agent}
|
71
|
+
headers = set_cookies(headers, cookies)
|
72
|
+
request_uri = uri.request_uri
|
73
|
+
http_request = Net::HTTP::Get.new(request_uri, headers)
|
74
|
+
|
75
|
+
begin
|
76
|
+
request.interface.start do |http|
|
77
|
+
http.read_timeout = timeout
|
78
|
+
response = http.request(http_request)
|
79
|
+
end
|
80
|
+
|
81
|
+
rescue Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::ENETUNREACH, Errno::ECONNRESET, Timeout::Error, Net::HTTPUnauthorized, Net::HTTPForbidden => error
|
82
|
+
log(:error, "[HttpUtilities::Http::Client] - Error occurred while trying to fetch url '#{uri.request_uri}'. Error Class: #{error.class.name}. Error Message: #{error.message}")
|
83
|
+
retries -= 1
|
84
|
+
retry if (retries > 0)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
if (response)
|
90
|
+
location = response['location']
|
91
|
+
|
92
|
+
if (!(response.code.to_s =~ /^30\d{1}/i).nil? && location && location.present?)
|
93
|
+
location = location.strip.downcase
|
94
|
+
redirect_count += 1
|
95
|
+
|
96
|
+
if (redirect_count < max_redirects)
|
97
|
+
request.cookies = handle_cookies(response)
|
98
|
+
location = "http://#{uri.host}/#{location.gsub(/^\//i, "")}" if (uri && (location =~ /^http(s)?/i).nil?)
|
99
|
+
|
100
|
+
log(:info, "[HttpUtilities::Http::Client] - Redirecting to location: #{location}.")
|
101
|
+
|
102
|
+
options = options.merge(:cookies => request.cookies) if request.cookies
|
103
|
+
response = perform_net_http_request(location, uri, options, redirect_count, max_redirects)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
request.cookies = handle_cookies(response)
|
108
|
+
response = set_response(response)
|
109
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
110
|
+
end
|
111
|
+
|
112
|
+
return response
|
113
|
+
end
|
114
|
+
|
115
|
+
def set_response(response)
|
116
|
+
if (response.is_a?(String))
|
117
|
+
response = response
|
118
|
+
elsif (response.is_a?(Net::HTTPResponse))
|
119
|
+
response = response.body
|
120
|
+
elsif (response.is_a?(HttpUtilities::Http::Response))
|
121
|
+
response = response.body
|
122
|
+
end
|
123
|
+
|
124
|
+
return response
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module HttpUtilities
|
5
|
+
module Http
|
6
|
+
module Adapters
|
7
|
+
module OpenUri
|
8
|
+
|
9
|
+
def retrieve_open_uri_content(url, options = {}, retries = 0, max_retries = 3)
|
10
|
+
response = nil
|
11
|
+
|
12
|
+
options = options.clone()
|
13
|
+
request = HttpUtilities::Http::Request.new
|
14
|
+
request.set_proxy_options(options)
|
15
|
+
|
16
|
+
open_uri_options = {"UserAgent" => request.user_agent}
|
17
|
+
open_uri_options[:read_timeout] = options.delete(:timeout) { |e| 120 }
|
18
|
+
|
19
|
+
if (request.proxy[:host] && request.proxy[:port])
|
20
|
+
proxy_address = Proxy.format_proxy_address(request.proxy[:host], request.proxy[:port], true)
|
21
|
+
open_uri_options[:proxy] = proxy_address
|
22
|
+
|
23
|
+
if (request.proxy[:username] && request.proxy[:password])
|
24
|
+
open_uri_options[:proxy_http_basic_authentication] = [proxy_address, request.proxy[:username], request.proxy[:password]]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
connection = nil
|
29
|
+
while (connection.nil? && retries < max_retries)
|
30
|
+
connection = open(url, open_uri_options)
|
31
|
+
retries += 1
|
32
|
+
end
|
33
|
+
|
34
|
+
if (connection)
|
35
|
+
connection.rewind
|
36
|
+
response = connection.readlines.join("\n")
|
37
|
+
response = HttpUtilities::Http::Response.new(response, request)
|
38
|
+
end
|
39
|
+
|
40
|
+
return response
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'open-uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
require 'cgi'
|
6
|
+
|
7
|
+
module HttpUtilities
|
8
|
+
module Http
|
9
|
+
class Client
|
10
|
+
include HttpUtilities::Http::Cookies
|
11
|
+
include HttpUtilities::Http::Url
|
12
|
+
include HttpUtilities::Http::Get
|
13
|
+
include HttpUtilities::Http::Post
|
14
|
+
include HttpUtilities::Http::Logger
|
15
|
+
|
16
|
+
include HttpUtilities::Http::Adapters::NetHttp
|
17
|
+
include HttpUtilities::Http::Adapters::OpenUri
|
18
|
+
include HttpUtilities::Http::Adapters::Curb
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Http
|
3
|
+
module Cookies
|
4
|
+
|
5
|
+
def handle_cookies(response)
|
6
|
+
cookies = nil
|
7
|
+
|
8
|
+
if (response && response.is_a?(Net::HTTPResponse))
|
9
|
+
cookie_fields = response.get_fields('Set-Cookie')
|
10
|
+
|
11
|
+
if (cookie_fields && cookie_fields.any?)
|
12
|
+
cookies = []
|
13
|
+
cookie_fields.each do |cookie|
|
14
|
+
filtered_cookie = cookie.split('; ').first
|
15
|
+
cookies << filtered_cookie
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
elsif (response && response.is_a?(HttpUtilities::Http::Response))
|
20
|
+
cookies = (response.request && response.request.cookies) ? response.request.cookies : nil
|
21
|
+
end
|
22
|
+
|
23
|
+
return cookies
|
24
|
+
end
|
25
|
+
|
26
|
+
def format_cookies(cookies)
|
27
|
+
cookie_string = ""
|
28
|
+
cookies.each {|cookie| cookie_string += "#{cookie}; "}
|
29
|
+
|
30
|
+
return cookie_string
|
31
|
+
end
|
32
|
+
|
33
|
+
def set_cookies(headers, cookies)
|
34
|
+
if (cookies && cookies.any?)
|
35
|
+
cookie_string = (cookies && cookies.is_a?(Array)) ? format_cookies(cookies) : nil
|
36
|
+
|
37
|
+
if (cookie_string)
|
38
|
+
cookie_hash = {'cookie' => cookie_string}
|
39
|
+
headers = (headers && !headers.empty?) ? headers.merge(cookie_hash) : cookie_hash
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
return headers
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module HttpUtilities
|
5
|
+
module Http
|
6
|
+
module Format
|
7
|
+
|
8
|
+
def as_html
|
9
|
+
self.parsed_body = (self.body && self.body != "") ? Nokogiri::HTML(self.body, nil, "utf-8") : nil
|
10
|
+
end
|
11
|
+
|
12
|
+
def as_xml
|
13
|
+
self.parsed_body = (self.body && self.body != "") ? Nokogiri::XML(self.body, nil, "utf-8") : nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def as_multi_xml
|
17
|
+
self.parsed_body = (self.body && self.body != "") ? MultiXml.parse(self.body) : nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def as_json
|
21
|
+
self.parsed_body = (self.body && self.body != "") ? self.body.to_json : nil
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Http
|
3
|
+
module Get
|
4
|
+
|
5
|
+
def retrieve_raw_content(url, options = {})
|
6
|
+
return retrieve_content_from_url(url, options)
|
7
|
+
end
|
8
|
+
|
9
|
+
def retrieve_raw_xml(url, options = {})
|
10
|
+
return retrieve_content_from_url(url, options)
|
11
|
+
end
|
12
|
+
|
13
|
+
def retrieve_parsed_xml(url, options = {})
|
14
|
+
options.merge!({:force_encoding => true, :format => :xml})
|
15
|
+
return retrieve_content_from_url(url, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def retrieve_parsed_html(url, options = {})
|
19
|
+
options.merge!({:force_encoding => true, :format => :html})
|
20
|
+
return retrieve_content_from_url(url, options)
|
21
|
+
end
|
22
|
+
|
23
|
+
def retrieve_parsed_html_and_fallback_to_proxies(url, options = {})
|
24
|
+
options.merge!({:force_encoding => true, :format => :html})
|
25
|
+
return retrieve_raw_content_and_fallback_to_proxies(url, options)
|
26
|
+
end
|
27
|
+
|
28
|
+
def retrieve_parsed_xml_and_fallback_to_proxies(url, options = {})
|
29
|
+
options.merge!({:force_encoding => true, :format => :xml})
|
30
|
+
return retrieve_raw_content_and_fallback_to_proxies(url, options)
|
31
|
+
end
|
32
|
+
|
33
|
+
def retrieve_raw_content_and_fallback_to_proxies(url, options = {})
|
34
|
+
retries = 0
|
35
|
+
max_retries = options.delete(:maximum_retrieval_retries) { |e| 5 }
|
36
|
+
options.merge!({:force_encoding => true})
|
37
|
+
|
38
|
+
response = retrieve_content_from_url(url, options)
|
39
|
+
|
40
|
+
while (!response && retries < max_retries) do
|
41
|
+
options.merge!({:use_proxy => true})
|
42
|
+
response = retrieve_content_from_url(url, options)
|
43
|
+
retries += 1
|
44
|
+
end
|
45
|
+
|
46
|
+
return response
|
47
|
+
end
|
48
|
+
|
49
|
+
def retrieve_content_from_url(url, options = {})
|
50
|
+
response = nil
|
51
|
+
method = options[:method] || :net_http
|
52
|
+
|
53
|
+
if (method.eql?(:open_uri))
|
54
|
+
response = retrieve_open_uri_content(url, options)
|
55
|
+
elsif (method.eql?(:net_http))
|
56
|
+
response = retrieve_net_http_content(url, options)
|
57
|
+
elsif (method.eql?(:curl))
|
58
|
+
response = retrieve_curl_content(url, options)
|
59
|
+
end
|
60
|
+
|
61
|
+
return response
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'open-uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
require 'cgi'
|
6
|
+
require 'mechanize'
|
7
|
+
|
8
|
+
module HttpUtilities
|
9
|
+
module Http
|
10
|
+
module Mechanize
|
11
|
+
|
12
|
+
class Client
|
13
|
+
attr_accessor :agent, :proxy, :user_agent
|
14
|
+
|
15
|
+
include HttpUtilities::Http::Logger
|
16
|
+
include HttpUtilities::Http::Url
|
17
|
+
include HttpUtilities::Http::ProxySupport
|
18
|
+
include HttpUtilities::Http::UserAgent
|
19
|
+
|
20
|
+
def initialize(options = {})
|
21
|
+
init_agent(options)
|
22
|
+
end
|
23
|
+
|
24
|
+
def init_agent(options = {})
|
25
|
+
verbose = options.fetch(:verbose, false)
|
26
|
+
logger = options.fetch(:logger, STDOUT)
|
27
|
+
|
28
|
+
self.agent = ::Mechanize.new
|
29
|
+
self.agent.log = ::Logger.new(logger) if (verbose)
|
30
|
+
|
31
|
+
self.set_proxy_options(options)
|
32
|
+
self.agent.set_proxy(self.proxy[:host], self.proxy[:port], self.proxy[:username], self.proxy[:password]) if (self.proxy[:host] && self.proxy[:port])
|
33
|
+
|
34
|
+
self.set_user_agent
|
35
|
+
(self.user_agent) ? self.agent.user_agent = self.user_agent : self.agent.user_agent_alias = 'Mac Safari'
|
36
|
+
|
37
|
+
timeout = options.fetch(:timeout, 300)
|
38
|
+
self.agent.open_timeout = self.agent.read_timeout = timeout if (timeout)
|
39
|
+
end
|
40
|
+
|
41
|
+
def reset_agent(options = {})
|
42
|
+
self.agent, self.proxy, self.user_agent = nil
|
43
|
+
init_agent(options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def open_url(url, options = {}, retries = 3)
|
47
|
+
page = nil
|
48
|
+
|
49
|
+
begin
|
50
|
+
page = self.agent.get(url)
|
51
|
+
|
52
|
+
rescue Net::HTTPNotFound, ::Mechanize::ResponseCodeError => error
|
53
|
+
log(:error, "[HttpUtilities::Http::Mechanize::Client] - Response Code Error occurred for url #{url}. Error class: #{error.class.name}. Error message: #{error.message}")
|
54
|
+
|
55
|
+
if (retries > 0)
|
56
|
+
reset_agent(options)
|
57
|
+
retries -= 1
|
58
|
+
retry
|
59
|
+
end
|
60
|
+
|
61
|
+
rescue Errno::ECONNREFUSED, Errno::ETIMEDOUT, Errno::ECONNRESET, Timeout::Error, Net::HTTPUnauthorized, Net::HTTPForbidden, StandardError => connection_error
|
62
|
+
log(:error, "[HttpUtilities::Http::Mechanize::Client] - Error occurred. Error class: #{connection_error.class.name}. Message: #{connection_error.message}")
|
63
|
+
|
64
|
+
if (retries > 0)
|
65
|
+
reset_agent(options)
|
66
|
+
retries -= 1
|
67
|
+
retry
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
return page
|
72
|
+
end
|
73
|
+
|
74
|
+
def get_page(url_or_page, options = {})
|
75
|
+
page = nil
|
76
|
+
|
77
|
+
if (url_or_page.is_a?(String))
|
78
|
+
page = open_url(url_or_page, options)
|
79
|
+
else
|
80
|
+
page = url_or_page
|
81
|
+
end
|
82
|
+
|
83
|
+
page = (page && page.is_a?(::Mechanize::Page)) ? page : nil #Occasionally proxies will yield Mechanize::File instead of a proper page
|
84
|
+
|
85
|
+
return page
|
86
|
+
end
|
87
|
+
|
88
|
+
def get_form(url_or_page, form_identifier = {}, options = {})
|
89
|
+
form = nil
|
90
|
+
index = form_identifier.delete(:index) { |el| 0 }
|
91
|
+
page = (url_or_page.is_a?(String)) ? get_page(url_or_page, options) : url_or_page
|
92
|
+
|
93
|
+
if (page)
|
94
|
+
if (form_identifier.empty?)
|
95
|
+
form = page.forms[index]
|
96
|
+
else
|
97
|
+
forms = page.forms_with(form_identifier)
|
98
|
+
form = (forms && forms.any?) ? forms[index] : nil
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
return form
|
103
|
+
end
|
104
|
+
|
105
|
+
def set_form_and_submit(url_or_page, form_identifier = {}, submit_identifier = :first, fields = {}, options = {}, retries = 3)
|
106
|
+
should_reset_radio_buttons = options.fetch(:should_reset_radio_buttons, false)
|
107
|
+
page = get_page(url_or_page, options)
|
108
|
+
form = page ? get_form(page, form_identifier) : nil
|
109
|
+
response_page = nil
|
110
|
+
|
111
|
+
if (form)
|
112
|
+
form.action = "#{url_or_page}#{form.action}" if (url_or_page.is_a?(String) && form.action.starts_with?("#"))
|
113
|
+
form = reset_radio_buttons(form) if (should_reset_radio_buttons)
|
114
|
+
form = set_form_fields(form, fields)
|
115
|
+
button = (submit_identifier.nil? || submit_identifier.eql?(:first)) ? form.buttons.first : form.button_with(submit_identifier)
|
116
|
+
|
117
|
+
begin
|
118
|
+
response_page = form.submit(button)
|
119
|
+
rescue Exception => e
|
120
|
+
log(:error, "[HttpUtilities::Http::Mechanize::Client] - Failed to submit form. Error: #{e.class.name} - #{e.message}.")
|
121
|
+
end
|
122
|
+
|
123
|
+
elsif (!form && retries > 0)
|
124
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Couldn't find page or form with identifier #{form_identifier.inspect}")
|
125
|
+
retries -= 1
|
126
|
+
reset_agent(options)
|
127
|
+
set_form_and_submit(url_or_page, form_identifier, submit_identifier, fields, options, retries)
|
128
|
+
end
|
129
|
+
|
130
|
+
return response_page
|
131
|
+
end
|
132
|
+
|
133
|
+
def reset_radio_buttons(form)
|
134
|
+
radio_buttons = form.radiobuttons
|
135
|
+
|
136
|
+
radio_buttons.each do |radio_button|
|
137
|
+
radio_button.checked = false
|
138
|
+
end if (form && radio_buttons && radio_buttons.any?)
|
139
|
+
|
140
|
+
return form
|
141
|
+
end
|
142
|
+
|
143
|
+
def set_form_fields(form, fields)
|
144
|
+
if (form && fields && !fields.empty?)
|
145
|
+
fields.each do |key, value|
|
146
|
+
form = set_form_field(form, key, value)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
return form
|
151
|
+
end
|
152
|
+
|
153
|
+
def set_form_field(form, key, value)
|
154
|
+
type = value.fetch(:type, :input)
|
155
|
+
identifier = value.fetch(:identifier, :name)
|
156
|
+
|
157
|
+
if (type.eql?(:input))
|
158
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting text field #{key} to value #{value[:value]}.")
|
159
|
+
form.has_field?(key.to_s) ? form.field_with(identifier => key.to_s).value = value[:value].to_s : set_form_fields(form, value[:fallbacks])
|
160
|
+
|
161
|
+
elsif (type.eql?(:checkbox))
|
162
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting checkbox #{key} to checked: #{value[:checked]}.")
|
163
|
+
form.checkbox_with(identifier => key.to_s).checked = value[:checked]
|
164
|
+
|
165
|
+
elsif (type.eql?(:radiobutton))
|
166
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting radio button #{key} to checked: #{value[:checked]}.")
|
167
|
+
form.radiobutton_with(identifier => key.to_s).checked = value[:checked]
|
168
|
+
|
169
|
+
elsif (type.eql?(:select))
|
170
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting select/dropdown #{key} to value: #{value[:value]}.")
|
171
|
+
form.field_with(identifier => key.to_s).value = value[:value].to_s
|
172
|
+
|
173
|
+
elsif (type.eql?(:file_upload))
|
174
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting file upload #{key} to value #{value[:value]}.")
|
175
|
+
form.file_upload_with(identifier => key.to_s).file_name = value[:value].to_s
|
176
|
+
end
|
177
|
+
|
178
|
+
return form
|
179
|
+
end
|
180
|
+
|
181
|
+
def get_parser(page)
|
182
|
+
parser = nil
|
183
|
+
|
184
|
+
if (page.is_a?(::Mechanize::Page))
|
185
|
+
parser = page.parser
|
186
|
+
elsif (page.is_a?(::Mechanize::File))
|
187
|
+
parser = Nokogiri::HTML(page.body, nil, "utf-8")
|
188
|
+
end
|
189
|
+
|
190
|
+
return parser
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Http
|
3
|
+
module Post
|
4
|
+
|
5
|
+
def post_and_retrieve_parsed_html(url, data, options = {})
|
6
|
+
options.merge!({:force_encoding => true, :format => :html})
|
7
|
+
return post_and_retrieve_content(url, data, options)
|
8
|
+
end
|
9
|
+
|
10
|
+
def post_and_retrieve_parsed_xml(url, data, options = {})
|
11
|
+
options.merge!({:force_encoding => true, :format => :xml})
|
12
|
+
return post_and_retrieve_content(url, data, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def post_and_retrieve_content(url, data, options = {})
|
16
|
+
response = nil
|
17
|
+
method = options[:method] || :net_http
|
18
|
+
response_only = options.delete(:response_only) { |e| true }
|
19
|
+
|
20
|
+
if (method.eql?(:net_http))
|
21
|
+
response = post_and_retrieve_content_using_net_http(url, data, options)
|
22
|
+
elsif (method.eql?(:curl))
|
23
|
+
response = post_and_retrieve_content_using_curl(url, data, options)
|
24
|
+
end
|
25
|
+
|
26
|
+
return response
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|