http_utilities 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +22 -0
- data/README +15 -0
- data/Rakefile +87 -0
- data/VERSION +1 -0
- data/http_utilities.gemspec +78 -0
- data/lib/generators/active_record/http_utilities_generator.rb +21 -0
- data/lib/generators/active_record/templates/migration.rb +34 -0
- data/lib/generators/active_record/templates/proxy.rb +3 -0
- data/lib/generators/helpers/file_helper.rb +35 -0
- data/lib/generators/helpers/orm_helpers.rb +15 -0
- data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
- data/lib/generators/templates/http_utilities.rb +2 -0
- data/lib/generators/templates/user_agents.yml +3419 -0
- data/lib/http_utilities/http/adapters/curb.rb +107 -0
- data/lib/http_utilities/http/adapters/net_http.rb +130 -0
- data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
- data/lib/http_utilities/http/client.rb +22 -0
- data/lib/http_utilities/http/cookies.rb +49 -0
- data/lib/http_utilities/http/format.rb +26 -0
- data/lib/http_utilities/http/get.rb +67 -0
- data/lib/http_utilities/http/logger.rb +11 -0
- data/lib/http_utilities/http/mechanize/client.rb +197 -0
- data/lib/http_utilities/http/post.rb +32 -0
- data/lib/http_utilities/http/proxy_support.rb +88 -0
- data/lib/http_utilities/http/request.rb +20 -0
- data/lib/http_utilities/http/response.rb +50 -0
- data/lib/http_utilities/http/url.rb +48 -0
- data/lib/http_utilities/http/user_agent.rb +3380 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
- data/lib/http_utilities/proxies/proxy_module.rb +70 -0
- data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
- data/lib/http_utilities/railtie.rb +11 -0
- data/lib/http_utilities.rb +47 -0
- data/lib/tasks/http_utilities_tasks.rake +19 -0
- data/spec/database.yml.example +10 -0
- data/spec/http_utilities/client_spec.rb +145 -0
- data/spec/http_utilities/mechanize_client_spec.rb +35 -0
- data/spec/http_utilities/proxy_checker_spec.rb +11 -0
- data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
- data/spec/http_utilities/proxy_spec.rb +114 -0
- data/spec/models.rb +6 -0
- data/spec/schema.rb +30 -0
- data/spec/spec_helper.rb +50 -0
- metadata +209 -0
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module HttpUtilities
|
4
|
+
module Http
|
5
|
+
module Adapters
|
6
|
+
module Curb
|
7
|
+
|
8
|
+
def post_and_retrieve_content_using_curl(url, data, options = {})
|
9
|
+
request = self.set_curl_options(url, options)
|
10
|
+
response = nil
|
11
|
+
|
12
|
+
if (request.interface && data)
|
13
|
+
if (data.is_a?(Hash))
|
14
|
+
data = data.map { |key, value| Curl::PostField.content(key.to_s, value.to_s) }
|
15
|
+
end
|
16
|
+
|
17
|
+
request.interface.http_post(data)
|
18
|
+
response = request.interface.body_str
|
19
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
20
|
+
end
|
21
|
+
|
22
|
+
return response
|
23
|
+
end
|
24
|
+
|
25
|
+
def retrieve_curl_content(url, options = {})
|
26
|
+
request = self.set_curl_options(url, options)
|
27
|
+
response = nil
|
28
|
+
|
29
|
+
begin
|
30
|
+
success = request.interface.perform
|
31
|
+
response = request.interface.body_str
|
32
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
33
|
+
|
34
|
+
rescue Exception => e
|
35
|
+
puts "\n\n#{Time.now}: IMPORTANT! Error occurred while trying to retrieve content from url #{url} and parse it. Error: #{e.message}. Error Class: #{e.class}"
|
36
|
+
end
|
37
|
+
|
38
|
+
return response
|
39
|
+
end
|
40
|
+
|
41
|
+
def go_to_url(url, options = {})
|
42
|
+
success = false
|
43
|
+
|
44
|
+
request = self.set_curl_options(url, options)
|
45
|
+
|
46
|
+
success = request.interface.perform
|
47
|
+
success = (success && curl.response_code.eql?(200))
|
48
|
+
|
49
|
+
return success
|
50
|
+
end
|
51
|
+
|
52
|
+
def set_curl_options(url, options = {})
|
53
|
+
options = options.clone()
|
54
|
+
|
55
|
+
request = HttpUtilities::Http::Request.new
|
56
|
+
request.set_proxy_options(options)
|
57
|
+
|
58
|
+
accept_cookies = options.delete(:accept_cookies) { |e| false }
|
59
|
+
timeout = options.delete(:timeout) { |e| 120 }
|
60
|
+
maximum_redirects = options.delete(:maximum_redirects) { |e| 10 }
|
61
|
+
disable_auth = options.delete(:disable_auth) { |e| false }
|
62
|
+
accept_content_type = options.delete(:accept_content_type) { |e| false }
|
63
|
+
content_type = options.delete(:content_type) { |e| false }
|
64
|
+
cookie_file = nil
|
65
|
+
|
66
|
+
curl = Curl::Easy.new(url) do |c|
|
67
|
+
c.headers ||= {}
|
68
|
+
c.headers["User-Agent"] = c.useragent = request.user_agent
|
69
|
+
c.headers["Accept"] = accept_content_type if (accept_content_type)
|
70
|
+
c.headers["Content-Type"] = content_type if (content_type)
|
71
|
+
c.timeout = timeout
|
72
|
+
c.ssl_verify_host = false
|
73
|
+
c.follow_location = true
|
74
|
+
c.max_redirects = maximum_redirects
|
75
|
+
|
76
|
+
if (disable_auth)
|
77
|
+
c.http_auth_types = nil
|
78
|
+
c.proxy_auth_types = nil
|
79
|
+
c.unrestricted_auth = false
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
if (accept_cookies)
|
84
|
+
FileUtils.mkdir_p File.join(Rails.root, "tmp/cookies")
|
85
|
+
identifier = Time.now.to_date.to_s(:db).gsub("-", "_").gsub("\s", "_").gsub(":", "_")
|
86
|
+
cookie_file = File.join(Rails.root, "tmp/cookies", "cookies_#{identifier}.txt")
|
87
|
+
|
88
|
+
curl.enable_cookies = true
|
89
|
+
curl.cookiejar = cookie_file
|
90
|
+
curl.cookiefile = cookie_file
|
91
|
+
end
|
92
|
+
|
93
|
+
if (request.proxy[:host] && request.proxy[:port])
|
94
|
+
curl.proxy_url = ::Proxy.format_proxy_address(request.proxy[:host], request.proxy[:port], false)
|
95
|
+
curl.proxy_type = 5 if (request.proxy[:protocol] && request.proxy[:protocol].downcase.eql?('socks5'))
|
96
|
+
curl.proxypwd = ::Proxy.format_proxy_credentials(request.proxy[:username], request.proxy[:password]) if (request.proxy[:username] && request.proxy[:password])
|
97
|
+
end
|
98
|
+
|
99
|
+
request.interface = curl
|
100
|
+
|
101
|
+
return request
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module HttpUtilities
|
6
|
+
module Http
|
7
|
+
module Adapters
|
8
|
+
module NetHttp
|
9
|
+
|
10
|
+
def post_and_retrieve_content_using_net_http(url, data, options = {})
|
11
|
+
uri = URI.parse(url)
|
12
|
+
request = set_net_http_options(uri, options)
|
13
|
+
response = nil
|
14
|
+
|
15
|
+
opts = options.clone()
|
16
|
+
content_type = opts.delete(:content_type) { |e| nil }
|
17
|
+
|
18
|
+
if (request.interface && data)
|
19
|
+
data = (data.is_a?(Hash)) ? generate_request_params(data) : data
|
20
|
+
|
21
|
+
request.interface.start do |http|
|
22
|
+
headers = {}
|
23
|
+
headers["User-Agent"] = request.user_agent
|
24
|
+
headers["Content-Type"] = content_type if (content_type)
|
25
|
+
|
26
|
+
http.post(uri.request_uri, data, headers) do |response_data|
|
27
|
+
response = response_data
|
28
|
+
end
|
29
|
+
|
30
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
return response
|
35
|
+
end
|
36
|
+
|
37
|
+
def set_net_http_options(uri, options = {})
|
38
|
+
request = HttpUtilities::Http::Request.new
|
39
|
+
request.set_proxy_options(options)
|
40
|
+
request.interface = Net::HTTP.new(uri.host, uri.port, request.proxy[:host], request.proxy[:port])
|
41
|
+
|
42
|
+
return request
|
43
|
+
end
|
44
|
+
|
45
|
+
def retrieve_net_http_content(url, options = {})
|
46
|
+
uri = URI.parse(url)
|
47
|
+
request = set_net_http_options(uri, options)
|
48
|
+
return perform_net_http_request(request, uri, options)
|
49
|
+
end
|
50
|
+
|
51
|
+
def perform_net_http_request(request_or_url, uri = nil, options = {}, redirect_count = 0, max_redirects = 5)
|
52
|
+
request = nil
|
53
|
+
response = nil
|
54
|
+
|
55
|
+
if (request_or_url)
|
56
|
+
opts = (options.is_a?(Hash)) ? options.clone() : {}
|
57
|
+
retries = opts.delete(:retries) { |e| 3 }
|
58
|
+
force_encoding = opts.delete(:force_encoding) { |e| false }
|
59
|
+
cookies = opts.delete(:cookies) { |e| nil }
|
60
|
+
timeout = opts.delete(:timeout) { |e| 30 }
|
61
|
+
|
62
|
+
if (request_or_url.is_a?(String))
|
63
|
+
uri = URI.parse(request_or_url)
|
64
|
+
request = self.set_net_http_options(uri, options)
|
65
|
+
else
|
66
|
+
request = request_or_url
|
67
|
+
end
|
68
|
+
|
69
|
+
if (uri && uri.respond_to?(:request_uri) && uri.request_uri)
|
70
|
+
headers = {"User-Agent" => request.user_agent}
|
71
|
+
headers = set_cookies(headers, cookies)
|
72
|
+
request_uri = uri.request_uri
|
73
|
+
http_request = Net::HTTP::Get.new(request_uri, headers)
|
74
|
+
|
75
|
+
begin
|
76
|
+
request.interface.start do |http|
|
77
|
+
http.read_timeout = timeout
|
78
|
+
response = http.request(http_request)
|
79
|
+
end
|
80
|
+
|
81
|
+
rescue Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::ENETUNREACH, Errno::ECONNRESET, Timeout::Error, Net::HTTPUnauthorized, Net::HTTPForbidden => error
|
82
|
+
log(:error, "[HttpUtilities::Http::Client] - Error occurred while trying to fetch url '#{uri.request_uri}'. Error Class: #{error.class.name}. Error Message: #{error.message}")
|
83
|
+
retries -= 1
|
84
|
+
retry if (retries > 0)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
if (response)
|
90
|
+
location = response['location']
|
91
|
+
|
92
|
+
if (!(response.code.to_s =~ /^30\d{1}/i).nil? && location && location.present?)
|
93
|
+
location = location.strip.downcase
|
94
|
+
redirect_count += 1
|
95
|
+
|
96
|
+
if (redirect_count < max_redirects)
|
97
|
+
request.cookies = handle_cookies(response)
|
98
|
+
location = "http://#{uri.host}/#{location.gsub(/^\//i, "")}" if (uri && (location =~ /^http(s)?/i).nil?)
|
99
|
+
|
100
|
+
log(:info, "[HttpUtilities::Http::Client] - Redirecting to location: #{location}.")
|
101
|
+
|
102
|
+
options = options.merge(:cookies => request.cookies) if request.cookies
|
103
|
+
response = perform_net_http_request(location, uri, options, redirect_count, max_redirects)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
request.cookies = handle_cookies(response)
|
108
|
+
response = set_response(response)
|
109
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
110
|
+
end
|
111
|
+
|
112
|
+
return response
|
113
|
+
end
|
114
|
+
|
115
|
+
def set_response(response)
|
116
|
+
if (response.is_a?(String))
|
117
|
+
response = response
|
118
|
+
elsif (response.is_a?(Net::HTTPResponse))
|
119
|
+
response = response.body
|
120
|
+
elsif (response.is_a?(HttpUtilities::Http::Response))
|
121
|
+
response = response.body
|
122
|
+
end
|
123
|
+
|
124
|
+
return response
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module HttpUtilities
|
5
|
+
module Http
|
6
|
+
module Adapters
|
7
|
+
module OpenUri
|
8
|
+
|
9
|
+
def retrieve_open_uri_content(url, options = {}, retries = 0, max_retries = 3)
|
10
|
+
response = nil
|
11
|
+
|
12
|
+
options = options.clone()
|
13
|
+
request = HttpUtilities::Http::Request.new
|
14
|
+
request.set_proxy_options(options)
|
15
|
+
|
16
|
+
open_uri_options = {"UserAgent" => request.user_agent}
|
17
|
+
open_uri_options[:read_timeout] = options.delete(:timeout) { |e| 120 }
|
18
|
+
|
19
|
+
if (request.proxy[:host] && request.proxy[:port])
|
20
|
+
proxy_address = Proxy.format_proxy_address(request.proxy[:host], request.proxy[:port], true)
|
21
|
+
open_uri_options[:proxy] = proxy_address
|
22
|
+
|
23
|
+
if (request.proxy[:username] && request.proxy[:password])
|
24
|
+
open_uri_options[:proxy_http_basic_authentication] = [proxy_address, request.proxy[:username], request.proxy[:password]]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
connection = nil
|
29
|
+
while (connection.nil? && retries < max_retries)
|
30
|
+
connection = open(url, open_uri_options)
|
31
|
+
retries += 1
|
32
|
+
end
|
33
|
+
|
34
|
+
if (connection)
|
35
|
+
connection.rewind
|
36
|
+
response = connection.readlines.join("\n")
|
37
|
+
response = HttpUtilities::Http::Response.new(response, request)
|
38
|
+
end
|
39
|
+
|
40
|
+
return response
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'open-uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
require 'cgi'
|
6
|
+
|
7
|
+
module HttpUtilities
|
8
|
+
module Http
|
9
|
+
class Client
|
10
|
+
include HttpUtilities::Http::Cookies
|
11
|
+
include HttpUtilities::Http::Url
|
12
|
+
include HttpUtilities::Http::Get
|
13
|
+
include HttpUtilities::Http::Post
|
14
|
+
include HttpUtilities::Http::Logger
|
15
|
+
|
16
|
+
include HttpUtilities::Http::Adapters::NetHttp
|
17
|
+
include HttpUtilities::Http::Adapters::OpenUri
|
18
|
+
include HttpUtilities::Http::Adapters::Curb
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Http
|
3
|
+
module Cookies
|
4
|
+
|
5
|
+
def handle_cookies(response)
|
6
|
+
cookies = nil
|
7
|
+
|
8
|
+
if (response && response.is_a?(Net::HTTPResponse))
|
9
|
+
cookie_fields = response.get_fields('Set-Cookie')
|
10
|
+
|
11
|
+
if (cookie_fields && cookie_fields.any?)
|
12
|
+
cookies = []
|
13
|
+
cookie_fields.each do |cookie|
|
14
|
+
filtered_cookie = cookie.split('; ').first
|
15
|
+
cookies << filtered_cookie
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
elsif (response && response.is_a?(HttpUtilities::Http::Response))
|
20
|
+
cookies = (response.request && response.request.cookies) ? response.request.cookies : nil
|
21
|
+
end
|
22
|
+
|
23
|
+
return cookies
|
24
|
+
end
|
25
|
+
|
26
|
+
def format_cookies(cookies)
|
27
|
+
cookie_string = ""
|
28
|
+
cookies.each {|cookie| cookie_string += "#{cookie}; "}
|
29
|
+
|
30
|
+
return cookie_string
|
31
|
+
end
|
32
|
+
|
33
|
+
def set_cookies(headers, cookies)
|
34
|
+
if (cookies && cookies.any?)
|
35
|
+
cookie_string = (cookies && cookies.is_a?(Array)) ? format_cookies(cookies) : nil
|
36
|
+
|
37
|
+
if (cookie_string)
|
38
|
+
cookie_hash = {'cookie' => cookie_string}
|
39
|
+
headers = (headers && !headers.empty?) ? headers.merge(cookie_hash) : cookie_hash
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
return headers
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module HttpUtilities
|
5
|
+
module Http
|
6
|
+
module Format
|
7
|
+
|
8
|
+
def as_html
|
9
|
+
self.parsed_body = (self.body && self.body != "") ? Nokogiri::HTML(self.body, nil, "utf-8") : nil
|
10
|
+
end
|
11
|
+
|
12
|
+
def as_xml
|
13
|
+
self.parsed_body = (self.body && self.body != "") ? Nokogiri::XML(self.body, nil, "utf-8") : nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def as_multi_xml
|
17
|
+
self.parsed_body = (self.body && self.body != "") ? MultiXml.parse(self.body) : nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def as_json
|
21
|
+
self.parsed_body = (self.body && self.body != "") ? self.body.to_json : nil
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Http
|
3
|
+
module Get
|
4
|
+
|
5
|
+
def retrieve_raw_content(url, options = {})
|
6
|
+
return retrieve_content_from_url(url, options)
|
7
|
+
end
|
8
|
+
|
9
|
+
def retrieve_raw_xml(url, options = {})
|
10
|
+
return retrieve_content_from_url(url, options)
|
11
|
+
end
|
12
|
+
|
13
|
+
def retrieve_parsed_xml(url, options = {})
|
14
|
+
options.merge!({:force_encoding => true, :format => :xml})
|
15
|
+
return retrieve_content_from_url(url, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def retrieve_parsed_html(url, options = {})
|
19
|
+
options.merge!({:force_encoding => true, :format => :html})
|
20
|
+
return retrieve_content_from_url(url, options)
|
21
|
+
end
|
22
|
+
|
23
|
+
def retrieve_parsed_html_and_fallback_to_proxies(url, options = {})
|
24
|
+
options.merge!({:force_encoding => true, :format => :html})
|
25
|
+
return retrieve_raw_content_and_fallback_to_proxies(url, options)
|
26
|
+
end
|
27
|
+
|
28
|
+
def retrieve_parsed_xml_and_fallback_to_proxies(url, options = {})
|
29
|
+
options.merge!({:force_encoding => true, :format => :xml})
|
30
|
+
return retrieve_raw_content_and_fallback_to_proxies(url, options)
|
31
|
+
end
|
32
|
+
|
33
|
+
def retrieve_raw_content_and_fallback_to_proxies(url, options = {})
|
34
|
+
retries = 0
|
35
|
+
max_retries = options.delete(:maximum_retrieval_retries) { |e| 5 }
|
36
|
+
options.merge!({:force_encoding => true})
|
37
|
+
|
38
|
+
response = retrieve_content_from_url(url, options)
|
39
|
+
|
40
|
+
while (!response && retries < max_retries) do
|
41
|
+
options.merge!({:use_proxy => true})
|
42
|
+
response = retrieve_content_from_url(url, options)
|
43
|
+
retries += 1
|
44
|
+
end
|
45
|
+
|
46
|
+
return response
|
47
|
+
end
|
48
|
+
|
49
|
+
def retrieve_content_from_url(url, options = {})
|
50
|
+
response = nil
|
51
|
+
method = options[:method] || :net_http
|
52
|
+
|
53
|
+
if (method.eql?(:open_uri))
|
54
|
+
response = retrieve_open_uri_content(url, options)
|
55
|
+
elsif (method.eql?(:net_http))
|
56
|
+
response = retrieve_net_http_content(url, options)
|
57
|
+
elsif (method.eql?(:curl))
|
58
|
+
response = retrieve_curl_content(url, options)
|
59
|
+
end
|
60
|
+
|
61
|
+
return response
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'open-uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
require 'cgi'
|
6
|
+
require 'mechanize'
|
7
|
+
|
8
|
+
module HttpUtilities
|
9
|
+
module Http
|
10
|
+
module Mechanize
|
11
|
+
|
12
|
+
class Client
|
13
|
+
attr_accessor :agent, :proxy, :user_agent
|
14
|
+
|
15
|
+
include HttpUtilities::Http::Logger
|
16
|
+
include HttpUtilities::Http::Url
|
17
|
+
include HttpUtilities::Http::ProxySupport
|
18
|
+
include HttpUtilities::Http::UserAgent
|
19
|
+
|
20
|
+
def initialize(options = {})
|
21
|
+
init_agent(options)
|
22
|
+
end
|
23
|
+
|
24
|
+
def init_agent(options = {})
|
25
|
+
verbose = options.fetch(:verbose, false)
|
26
|
+
logger = options.fetch(:logger, STDOUT)
|
27
|
+
|
28
|
+
self.agent = ::Mechanize.new
|
29
|
+
self.agent.log = ::Logger.new(logger) if (verbose)
|
30
|
+
|
31
|
+
self.set_proxy_options(options)
|
32
|
+
self.agent.set_proxy(self.proxy[:host], self.proxy[:port], self.proxy[:username], self.proxy[:password]) if (self.proxy[:host] && self.proxy[:port])
|
33
|
+
|
34
|
+
self.set_user_agent
|
35
|
+
(self.user_agent) ? self.agent.user_agent = self.user_agent : self.agent.user_agent_alias = 'Mac Safari'
|
36
|
+
|
37
|
+
timeout = options.fetch(:timeout, 300)
|
38
|
+
self.agent.open_timeout = self.agent.read_timeout = timeout if (timeout)
|
39
|
+
end
|
40
|
+
|
41
|
+
def reset_agent(options = {})
|
42
|
+
self.agent, self.proxy, self.user_agent = nil
|
43
|
+
init_agent(options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def open_url(url, options = {}, retries = 3)
|
47
|
+
page = nil
|
48
|
+
|
49
|
+
begin
|
50
|
+
page = self.agent.get(url)
|
51
|
+
|
52
|
+
rescue Net::HTTPNotFound, ::Mechanize::ResponseCodeError => error
|
53
|
+
log(:error, "[HttpUtilities::Http::Mechanize::Client] - Response Code Error occurred for url #{url}. Error class: #{error.class.name}. Error message: #{error.message}")
|
54
|
+
|
55
|
+
if (retries > 0)
|
56
|
+
reset_agent(options)
|
57
|
+
retries -= 1
|
58
|
+
retry
|
59
|
+
end
|
60
|
+
|
61
|
+
rescue Errno::ECONNREFUSED, Errno::ETIMEDOUT, Errno::ECONNRESET, Timeout::Error, Net::HTTPUnauthorized, Net::HTTPForbidden, StandardError => connection_error
|
62
|
+
log(:error, "[HttpUtilities::Http::Mechanize::Client] - Error occurred. Error class: #{connection_error.class.name}. Message: #{connection_error.message}")
|
63
|
+
|
64
|
+
if (retries > 0)
|
65
|
+
reset_agent(options)
|
66
|
+
retries -= 1
|
67
|
+
retry
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
return page
|
72
|
+
end
|
73
|
+
|
74
|
+
def get_page(url_or_page, options = {})
|
75
|
+
page = nil
|
76
|
+
|
77
|
+
if (url_or_page.is_a?(String))
|
78
|
+
page = open_url(url_or_page, options)
|
79
|
+
else
|
80
|
+
page = url_or_page
|
81
|
+
end
|
82
|
+
|
83
|
+
page = (page && page.is_a?(::Mechanize::Page)) ? page : nil #Occasionally proxies will yield Mechanize::File instead of a proper page
|
84
|
+
|
85
|
+
return page
|
86
|
+
end
|
87
|
+
|
88
|
+
def get_form(url_or_page, form_identifier = {}, options = {})
|
89
|
+
form = nil
|
90
|
+
index = form_identifier.delete(:index) { |el| 0 }
|
91
|
+
page = (url_or_page.is_a?(String)) ? get_page(url_or_page, options) : url_or_page
|
92
|
+
|
93
|
+
if (page)
|
94
|
+
if (form_identifier.empty?)
|
95
|
+
form = page.forms[index]
|
96
|
+
else
|
97
|
+
forms = page.forms_with(form_identifier)
|
98
|
+
form = (forms && forms.any?) ? forms[index] : nil
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
return form
|
103
|
+
end
|
104
|
+
|
105
|
+
def set_form_and_submit(url_or_page, form_identifier = {}, submit_identifier = :first, fields = {}, options = {}, retries = 3)
|
106
|
+
should_reset_radio_buttons = options.fetch(:should_reset_radio_buttons, false)
|
107
|
+
page = get_page(url_or_page, options)
|
108
|
+
form = page ? get_form(page, form_identifier) : nil
|
109
|
+
response_page = nil
|
110
|
+
|
111
|
+
if (form)
|
112
|
+
form.action = "#{url_or_page}#{form.action}" if (url_or_page.is_a?(String) && form.action.starts_with?("#"))
|
113
|
+
form = reset_radio_buttons(form) if (should_reset_radio_buttons)
|
114
|
+
form = set_form_fields(form, fields)
|
115
|
+
button = (submit_identifier.nil? || submit_identifier.eql?(:first)) ? form.buttons.first : form.button_with(submit_identifier)
|
116
|
+
|
117
|
+
begin
|
118
|
+
response_page = form.submit(button)
|
119
|
+
rescue Exception => e
|
120
|
+
log(:error, "[HttpUtilities::Http::Mechanize::Client] - Failed to submit form. Error: #{e.class.name} - #{e.message}.")
|
121
|
+
end
|
122
|
+
|
123
|
+
elsif (!form && retries > 0)
|
124
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Couldn't find page or form with identifier #{form_identifier.inspect}")
|
125
|
+
retries -= 1
|
126
|
+
reset_agent(options)
|
127
|
+
set_form_and_submit(url_or_page, form_identifier, submit_identifier, fields, options, retries)
|
128
|
+
end
|
129
|
+
|
130
|
+
return response_page
|
131
|
+
end
|
132
|
+
|
133
|
+
def reset_radio_buttons(form)
|
134
|
+
radio_buttons = form.radiobuttons
|
135
|
+
|
136
|
+
radio_buttons.each do |radio_button|
|
137
|
+
radio_button.checked = false
|
138
|
+
end if (form && radio_buttons && radio_buttons.any?)
|
139
|
+
|
140
|
+
return form
|
141
|
+
end
|
142
|
+
|
143
|
+
def set_form_fields(form, fields)
|
144
|
+
if (form && fields && !fields.empty?)
|
145
|
+
fields.each do |key, value|
|
146
|
+
form = set_form_field(form, key, value)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
return form
|
151
|
+
end
|
152
|
+
|
153
|
+
def set_form_field(form, key, value)
|
154
|
+
type = value.fetch(:type, :input)
|
155
|
+
identifier = value.fetch(:identifier, :name)
|
156
|
+
|
157
|
+
if (type.eql?(:input))
|
158
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting text field #{key} to value #{value[:value]}.")
|
159
|
+
form.has_field?(key.to_s) ? form.field_with(identifier => key.to_s).value = value[:value].to_s : set_form_fields(form, value[:fallbacks])
|
160
|
+
|
161
|
+
elsif (type.eql?(:checkbox))
|
162
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting checkbox #{key} to checked: #{value[:checked]}.")
|
163
|
+
form.checkbox_with(identifier => key.to_s).checked = value[:checked]
|
164
|
+
|
165
|
+
elsif (type.eql?(:radiobutton))
|
166
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting radio button #{key} to checked: #{value[:checked]}.")
|
167
|
+
form.radiobutton_with(identifier => key.to_s).checked = value[:checked]
|
168
|
+
|
169
|
+
elsif (type.eql?(:select))
|
170
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting select/dropdown #{key} to value: #{value[:value]}.")
|
171
|
+
form.field_with(identifier => key.to_s).value = value[:value].to_s
|
172
|
+
|
173
|
+
elsif (type.eql?(:file_upload))
|
174
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Setting file upload #{key} to value #{value[:value]}.")
|
175
|
+
form.file_upload_with(identifier => key.to_s).file_name = value[:value].to_s
|
176
|
+
end
|
177
|
+
|
178
|
+
return form
|
179
|
+
end
|
180
|
+
|
181
|
+
def get_parser(page)
|
182
|
+
parser = nil
|
183
|
+
|
184
|
+
if (page.is_a?(::Mechanize::Page))
|
185
|
+
parser = page.parser
|
186
|
+
elsif (page.is_a?(::Mechanize::File))
|
187
|
+
parser = Nokogiri::HTML(page.body, nil, "utf-8")
|
188
|
+
end
|
189
|
+
|
190
|
+
return parser
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Http
|
3
|
+
module Post
|
4
|
+
|
5
|
+
def post_and_retrieve_parsed_html(url, data, options = {})
|
6
|
+
options.merge!({:force_encoding => true, :format => :html})
|
7
|
+
return post_and_retrieve_content(url, data, options)
|
8
|
+
end
|
9
|
+
|
10
|
+
def post_and_retrieve_parsed_xml(url, data, options = {})
|
11
|
+
options.merge!({:force_encoding => true, :format => :xml})
|
12
|
+
return post_and_retrieve_content(url, data, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def post_and_retrieve_content(url, data, options = {})
|
16
|
+
response = nil
|
17
|
+
method = options[:method] || :net_http
|
18
|
+
response_only = options.delete(:response_only) { |e| true }
|
19
|
+
|
20
|
+
if (method.eql?(:net_http))
|
21
|
+
response = post_and_retrieve_content_using_net_http(url, data, options)
|
22
|
+
elsif (method.eql?(:curl))
|
23
|
+
response = post_and_retrieve_content_using_curl(url, data, options)
|
24
|
+
end
|
25
|
+
|
26
|
+
return response
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|