http_utilities 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +22 -0
  3. data/README +15 -0
  4. data/Rakefile +87 -0
  5. data/VERSION +1 -0
  6. data/http_utilities.gemspec +78 -0
  7. data/lib/generators/active_record/http_utilities_generator.rb +21 -0
  8. data/lib/generators/active_record/templates/migration.rb +34 -0
  9. data/lib/generators/active_record/templates/proxy.rb +3 -0
  10. data/lib/generators/helpers/file_helper.rb +35 -0
  11. data/lib/generators/helpers/orm_helpers.rb +15 -0
  12. data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
  13. data/lib/generators/templates/http_utilities.rb +2 -0
  14. data/lib/generators/templates/user_agents.yml +3419 -0
  15. data/lib/http_utilities/http/adapters/curb.rb +107 -0
  16. data/lib/http_utilities/http/adapters/net_http.rb +130 -0
  17. data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
  18. data/lib/http_utilities/http/client.rb +22 -0
  19. data/lib/http_utilities/http/cookies.rb +49 -0
  20. data/lib/http_utilities/http/format.rb +26 -0
  21. data/lib/http_utilities/http/get.rb +67 -0
  22. data/lib/http_utilities/http/logger.rb +11 -0
  23. data/lib/http_utilities/http/mechanize/client.rb +197 -0
  24. data/lib/http_utilities/http/post.rb +32 -0
  25. data/lib/http_utilities/http/proxy_support.rb +88 -0
  26. data/lib/http_utilities/http/request.rb +20 -0
  27. data/lib/http_utilities/http/response.rb +50 -0
  28. data/lib/http_utilities/http/url.rb +48 -0
  29. data/lib/http_utilities/http/user_agent.rb +3380 -0
  30. data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
  31. data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
  32. data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
  33. data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
  34. data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
  35. data/lib/http_utilities/proxies/proxy_module.rb +70 -0
  36. data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
  37. data/lib/http_utilities/railtie.rb +11 -0
  38. data/lib/http_utilities.rb +47 -0
  39. data/lib/tasks/http_utilities_tasks.rake +19 -0
  40. data/spec/database.yml.example +10 -0
  41. data/spec/http_utilities/client_spec.rb +145 -0
  42. data/spec/http_utilities/mechanize_client_spec.rb +35 -0
  43. data/spec/http_utilities/proxy_checker_spec.rb +11 -0
  44. data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
  45. data/spec/http_utilities/proxy_spec.rb +114 -0
  46. data/spec/models.rb +6 -0
  47. data/spec/schema.rb +30 -0
  48. data/spec/spec_helper.rb +50 -0
  49. metadata +209 -0
@@ -0,0 +1,88 @@
1
+ require 'open-uri'
2
+ require 'uri'
3
+
4
+ module HttpUtilities
5
+ module Http
6
+ module ProxySupport
7
+
8
+ def set_proxy_options(options = {})
9
+ use_proxy = options.fetch(:use_proxy, false)
10
+ specific_proxy = options.fetch(:proxy, nil)
11
+ proxy_username = options.fetch(:proxy_username, nil)
12
+ proxy_password = options.fetch(:proxy_password, nil)
13
+ proxy_credentials = options.fetch(:proxy_credentials, nil)
14
+ reset_proxy = options.fetch(:reset_proxy, true)
15
+
16
+ if (reset_proxy)
17
+ self.proxy = {}
18
+ self.proxy[:host] = options.fetch(:proxy_host, nil)
19
+ self.proxy[:port] = options.fetch(:proxy_port, nil)
20
+ self.proxy[:protocol] = options.fetch(:proxy_protocol, :http)
21
+ self.proxy[:type] = options.fetch(:proxy_type, :all)
22
+ end
23
+
24
+ if (use_proxy || (specific_proxy && !self.using_proxy?))
25
+ if (specific_proxy && specific_proxy.is_a?(String))
26
+ specific_proxy = specific_proxy.gsub(/^http(s)?:\/\//i, "")
27
+ parts = specific_proxy.split(":")
28
+
29
+ if (parts.size.eql?(2))
30
+ self.proxy[:host] = parts.first
31
+ self.proxy[:port] = parts.second.to_i
32
+ end
33
+
34
+ elsif (specific_proxy && specific_proxy.is_a?(Hash) && !specific_proxy.empty? && specific_proxy[:host] && specific_proxy[:port])
35
+ self.proxy = specific_proxy
36
+
37
+ elsif (proxy_model_defined?)
38
+ proxy_object = Proxy.get_random_proxy(self.proxy[:protocol], self.proxy[:type])
39
+
40
+ #log(:info, "[HttpUtilities::Http::ProxySupport] - Randomized Proxy object: #{proxy_object.inspect}")
41
+
42
+ if (proxy_object)
43
+ self.proxy[:host] = proxy_object.host
44
+ self.proxy[:port] = proxy_object.port
45
+ end
46
+ end
47
+ end
48
+
49
+ set_proxy_credentials(proxy_username, proxy_password, proxy_credentials)
50
+ end
51
+
52
+ def set_proxy_credentials(proxy_username, proxy_password, proxy_credentials)
53
+ if (self.using_proxy? && (!self.proxy[:username] || !self.proxy[:password]))
54
+ if (proxy_username && proxy_password)
55
+ self.proxy[:username] = proxy_username
56
+ self.proxy[:password] = proxy_password
57
+
58
+ elsif (proxy_credentials)
59
+ if (proxy_credentials.is_a?(Hash))
60
+ self.proxy[:username] = proxy_credentials[:username]
61
+ self.proxy[:password] = proxy_credentials[:password]
62
+
63
+ elsif (proxy_credentials.is_a?(String))
64
+ parts = proxy_credentials.split(":")
65
+
66
+ if (parts && parts.any? && parts.size >= 2)
67
+ self.proxy[:username] = parts.first
68
+ self.proxy[:password] = parts.second
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ def proxy_model_defined?
76
+ defined = Module.const_get("Proxy").is_a?(Class) rescue false
77
+ defined = (defined && Proxy.respond_to?(:get_random_proxy))
78
+
79
+ return defined
80
+ end
81
+
82
+ def using_proxy?
83
+ return (self.proxy[:host] && self.proxy[:port] && self.proxy[:port] > 0)
84
+ end
85
+
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,20 @@
1
+ module HttpUtilities
2
+ module Http
3
+ class Request
4
+ include HttpUtilities::Http::Logger
5
+ include HttpUtilities::Http::ProxySupport
6
+ include HttpUtilities::Http::UserAgent
7
+
8
+ attr_accessor :interface, :proxy, :cookies, :user_agent
9
+
10
+ def initialize(interface = nil, proxy = {}, cookies = [])
11
+ self.interface = interface
12
+ self.proxy = proxy
13
+ self.cookies = cookies
14
+
15
+ self.set_user_agent
16
+ end
17
+
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,50 @@
1
+ module HttpUtilities
2
+ module Http
3
+ class Response
4
+ include HttpUtilities::Http::Format
5
+ include HttpUtilities::Http::Logger
6
+
7
+ attr_accessor :body, :parsed_body, :page, :format, :request, :force_encoding
8
+
9
+ def initialize(body = nil, request = nil, options = {})
10
+ options = options.clone()
11
+
12
+ self.body = body
13
+ self.request = request
14
+
15
+ self.parsed_body = nil
16
+
17
+ self.format = options.delete(:format) { |e| nil }
18
+ self.force_encoding = options.delete(:force_encoding) { |e| true }
19
+
20
+ encode if (self.force_encoding)
21
+ parse_response
22
+ end
23
+
24
+ def encode
25
+ if (self.body)
26
+ begin
27
+ self.body = self.body.force_encoding('UTF-8').encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
28
+ rescue Exception => e
29
+ log(:error, "[HttpUtilities::Http::Format] - Failed to convert response with String#encode. Error: #{e.class.name}. Message: #{e.message}.")
30
+ end
31
+ end
32
+ end
33
+
34
+ def parse_response
35
+ self.send("as_#{self.format}".to_sym) if (self.body && self.format)
36
+ end
37
+
38
+ def set_page(page)
39
+ self.page = page
40
+
41
+ if (page && page.parser)
42
+ self.body = page.parser.content
43
+ self.parsed_body = page.parser
44
+ end
45
+ end
46
+
47
+ end
48
+ end
49
+ end
50
+
@@ -0,0 +1,48 @@
1
+ require 'open-uri'
2
+ require 'uri'
3
+ require 'cgi'
4
+
5
+ module HttpUtilities
6
+ module Http
7
+ module Url
8
+
9
+ def generate_request_url(params = {})
10
+ params.symbolize_keys!
11
+ url = params.delete(:url) { |e| "" }
12
+
13
+ sorted_params = params.sort
14
+ query_parts = []
15
+
16
+ sorted_params.each do |param_row|
17
+ param = encode_param(param_row.first)
18
+ value = encode_param(param_row.last)
19
+ query_parts << "#{param}=#{value}"
20
+ end
21
+
22
+ query = query_parts.join("&")
23
+ request = "#{url}?#{query}"
24
+ return request
25
+ end
26
+
27
+ def generate_request_params(params)
28
+ sorted_params = params.sort
29
+ query_parts = []
30
+
31
+ sorted_params.each do |param_row|
32
+ param = param_row.first
33
+ value = param_row.last
34
+ query_parts << "#{param}=#{value}"
35
+ end
36
+
37
+ query = query_parts.join("&")
38
+
39
+ return query
40
+ end
41
+
42
+ def encode_param(param)
43
+ return CGI.escape(param.to_s).to_s.gsub("+", "%20").gsub("%7E", "~") if (param)
44
+ end
45
+
46
+ end
47
+ end
48
+ end