http_utilities 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +22 -0
- data/README +15 -0
- data/Rakefile +87 -0
- data/VERSION +1 -0
- data/http_utilities.gemspec +78 -0
- data/lib/generators/active_record/http_utilities_generator.rb +21 -0
- data/lib/generators/active_record/templates/migration.rb +34 -0
- data/lib/generators/active_record/templates/proxy.rb +3 -0
- data/lib/generators/helpers/file_helper.rb +35 -0
- data/lib/generators/helpers/orm_helpers.rb +15 -0
- data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
- data/lib/generators/templates/http_utilities.rb +2 -0
- data/lib/generators/templates/user_agents.yml +3419 -0
- data/lib/http_utilities/http/adapters/curb.rb +107 -0
- data/lib/http_utilities/http/adapters/net_http.rb +130 -0
- data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
- data/lib/http_utilities/http/client.rb +22 -0
- data/lib/http_utilities/http/cookies.rb +49 -0
- data/lib/http_utilities/http/format.rb +26 -0
- data/lib/http_utilities/http/get.rb +67 -0
- data/lib/http_utilities/http/logger.rb +11 -0
- data/lib/http_utilities/http/mechanize/client.rb +197 -0
- data/lib/http_utilities/http/post.rb +32 -0
- data/lib/http_utilities/http/proxy_support.rb +88 -0
- data/lib/http_utilities/http/request.rb +20 -0
- data/lib/http_utilities/http/response.rb +50 -0
- data/lib/http_utilities/http/url.rb +48 -0
- data/lib/http_utilities/http/user_agent.rb +3380 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
- data/lib/http_utilities/proxies/proxy_module.rb +70 -0
- data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
- data/lib/http_utilities/railtie.rb +11 -0
- data/lib/http_utilities.rb +47 -0
- data/lib/tasks/http_utilities_tasks.rake +19 -0
- data/spec/database.yml.example +10 -0
- data/spec/http_utilities/client_spec.rb +145 -0
- data/spec/http_utilities/mechanize_client_spec.rb +35 -0
- data/spec/http_utilities/proxy_checker_spec.rb +11 -0
- data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
- data/spec/http_utilities/proxy_spec.rb +114 -0
- data/spec/models.rb +6 -0
- data/spec/schema.rb +30 -0
- data/spec/spec_helper.rb +50 -0
- metadata +209 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module HttpUtilities
|
5
|
+
module Http
|
6
|
+
module ProxySupport
|
7
|
+
|
8
|
+
def set_proxy_options(options = {})
|
9
|
+
use_proxy = options.fetch(:use_proxy, false)
|
10
|
+
specific_proxy = options.fetch(:proxy, nil)
|
11
|
+
proxy_username = options.fetch(:proxy_username, nil)
|
12
|
+
proxy_password = options.fetch(:proxy_password, nil)
|
13
|
+
proxy_credentials = options.fetch(:proxy_credentials, nil)
|
14
|
+
reset_proxy = options.fetch(:reset_proxy, true)
|
15
|
+
|
16
|
+
if (reset_proxy)
|
17
|
+
self.proxy = {}
|
18
|
+
self.proxy[:host] = options.fetch(:proxy_host, nil)
|
19
|
+
self.proxy[:port] = options.fetch(:proxy_port, nil)
|
20
|
+
self.proxy[:protocol] = options.fetch(:proxy_protocol, :http)
|
21
|
+
self.proxy[:type] = options.fetch(:proxy_type, :all)
|
22
|
+
end
|
23
|
+
|
24
|
+
if (use_proxy || (specific_proxy && !self.using_proxy?))
|
25
|
+
if (specific_proxy && specific_proxy.is_a?(String))
|
26
|
+
specific_proxy = specific_proxy.gsub(/^http(s)?:\/\//i, "")
|
27
|
+
parts = specific_proxy.split(":")
|
28
|
+
|
29
|
+
if (parts.size.eql?(2))
|
30
|
+
self.proxy[:host] = parts.first
|
31
|
+
self.proxy[:port] = parts.second.to_i
|
32
|
+
end
|
33
|
+
|
34
|
+
elsif (specific_proxy && specific_proxy.is_a?(Hash) && !specific_proxy.empty? && specific_proxy[:host] && specific_proxy[:port])
|
35
|
+
self.proxy = specific_proxy
|
36
|
+
|
37
|
+
elsif (proxy_model_defined?)
|
38
|
+
proxy_object = Proxy.get_random_proxy(self.proxy[:protocol], self.proxy[:type])
|
39
|
+
|
40
|
+
#log(:info, "[HttpUtilities::Http::ProxySupport] - Randomized Proxy object: #{proxy_object.inspect}")
|
41
|
+
|
42
|
+
if (proxy_object)
|
43
|
+
self.proxy[:host] = proxy_object.host
|
44
|
+
self.proxy[:port] = proxy_object.port
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
set_proxy_credentials(proxy_username, proxy_password, proxy_credentials)
|
50
|
+
end
|
51
|
+
|
52
|
+
def set_proxy_credentials(proxy_username, proxy_password, proxy_credentials)
|
53
|
+
if (self.using_proxy? && (!self.proxy[:username] || !self.proxy[:password]))
|
54
|
+
if (proxy_username && proxy_password)
|
55
|
+
self.proxy[:username] = proxy_username
|
56
|
+
self.proxy[:password] = proxy_password
|
57
|
+
|
58
|
+
elsif (proxy_credentials)
|
59
|
+
if (proxy_credentials.is_a?(Hash))
|
60
|
+
self.proxy[:username] = proxy_credentials[:username]
|
61
|
+
self.proxy[:password] = proxy_credentials[:password]
|
62
|
+
|
63
|
+
elsif (proxy_credentials.is_a?(String))
|
64
|
+
parts = proxy_credentials.split(":")
|
65
|
+
|
66
|
+
if (parts && parts.any? && parts.size >= 2)
|
67
|
+
self.proxy[:username] = parts.first
|
68
|
+
self.proxy[:password] = parts.second
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def proxy_model_defined?
|
76
|
+
defined = Module.const_get("Proxy").is_a?(Class) rescue false
|
77
|
+
defined = (defined && Proxy.respond_to?(:get_random_proxy))
|
78
|
+
|
79
|
+
return defined
|
80
|
+
end
|
81
|
+
|
82
|
+
def using_proxy?
|
83
|
+
return (self.proxy[:host] && self.proxy[:port] && self.proxy[:port] > 0)
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Http
|
3
|
+
class Request
|
4
|
+
include HttpUtilities::Http::Logger
|
5
|
+
include HttpUtilities::Http::ProxySupport
|
6
|
+
include HttpUtilities::Http::UserAgent
|
7
|
+
|
8
|
+
attr_accessor :interface, :proxy, :cookies, :user_agent
|
9
|
+
|
10
|
+
def initialize(interface = nil, proxy = {}, cookies = [])
|
11
|
+
self.interface = interface
|
12
|
+
self.proxy = proxy
|
13
|
+
self.cookies = cookies
|
14
|
+
|
15
|
+
self.set_user_agent
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Http
|
3
|
+
class Response
|
4
|
+
include HttpUtilities::Http::Format
|
5
|
+
include HttpUtilities::Http::Logger
|
6
|
+
|
7
|
+
attr_accessor :body, :parsed_body, :page, :format, :request, :force_encoding
|
8
|
+
|
9
|
+
def initialize(body = nil, request = nil, options = {})
|
10
|
+
options = options.clone()
|
11
|
+
|
12
|
+
self.body = body
|
13
|
+
self.request = request
|
14
|
+
|
15
|
+
self.parsed_body = nil
|
16
|
+
|
17
|
+
self.format = options.delete(:format) { |e| nil }
|
18
|
+
self.force_encoding = options.delete(:force_encoding) { |e| true }
|
19
|
+
|
20
|
+
encode if (self.force_encoding)
|
21
|
+
parse_response
|
22
|
+
end
|
23
|
+
|
24
|
+
def encode
|
25
|
+
if (self.body)
|
26
|
+
begin
|
27
|
+
self.body = self.body.force_encoding('UTF-8').encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
|
28
|
+
rescue Exception => e
|
29
|
+
log(:error, "[HttpUtilities::Http::Format] - Failed to convert response with String#encode. Error: #{e.class.name}. Message: #{e.message}.")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_response
|
35
|
+
self.send("as_#{self.format}".to_sym) if (self.body && self.format)
|
36
|
+
end
|
37
|
+
|
38
|
+
def set_page(page)
|
39
|
+
self.page = page
|
40
|
+
|
41
|
+
if (page && page.parser)
|
42
|
+
self.body = page.parser.content
|
43
|
+
self.parsed_body = page.parser
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'uri'
|
3
|
+
require 'cgi'
|
4
|
+
|
5
|
+
module HttpUtilities
|
6
|
+
module Http
|
7
|
+
module Url
|
8
|
+
|
9
|
+
def generate_request_url(params = {})
|
10
|
+
params.symbolize_keys!
|
11
|
+
url = params.delete(:url) { |e| "" }
|
12
|
+
|
13
|
+
sorted_params = params.sort
|
14
|
+
query_parts = []
|
15
|
+
|
16
|
+
sorted_params.each do |param_row|
|
17
|
+
param = encode_param(param_row.first)
|
18
|
+
value = encode_param(param_row.last)
|
19
|
+
query_parts << "#{param}=#{value}"
|
20
|
+
end
|
21
|
+
|
22
|
+
query = query_parts.join("&")
|
23
|
+
request = "#{url}?#{query}"
|
24
|
+
return request
|
25
|
+
end
|
26
|
+
|
27
|
+
def generate_request_params(params)
|
28
|
+
sorted_params = params.sort
|
29
|
+
query_parts = []
|
30
|
+
|
31
|
+
sorted_params.each do |param_row|
|
32
|
+
param = param_row.first
|
33
|
+
value = param_row.last
|
34
|
+
query_parts << "#{param}=#{value}"
|
35
|
+
end
|
36
|
+
|
37
|
+
query = query_parts.join("&")
|
38
|
+
|
39
|
+
return query
|
40
|
+
end
|
41
|
+
|
42
|
+
def encode_param(param)
|
43
|
+
return CGI.escape(param.to_s).to_s.gsub("+", "%20").gsub("%7E", "~") if (param)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|