proxy_fetcher 0.10.2 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +22 -1
 - data/Gemfile +8 -5
 - data/Rakefile +7 -3
 - data/gemfiles/nokogiri.gemfile +8 -6
 - data/gemfiles/oga.gemfile +8 -6
 - data/lib/proxy_fetcher.rb +46 -35
 - data/lib/proxy_fetcher/client/client.rb +10 -3
 - data/lib/proxy_fetcher/client/request.rb +4 -4
 - data/lib/proxy_fetcher/configuration.rb +24 -19
 - data/lib/proxy_fetcher/document.rb +0 -9
 - data/lib/proxy_fetcher/document/adapters.rb +1 -1
 - data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +3 -12
 - data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +1 -1
 - data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +1 -1
 - data/lib/proxy_fetcher/document/node.rb +2 -2
 - data/lib/proxy_fetcher/exceptions.rb +6 -6
 - data/lib/proxy_fetcher/manager.rb +42 -9
 - data/lib/proxy_fetcher/providers/base.rb +43 -22
 - data/lib/proxy_fetcher/providers/free_proxy_list.rb +9 -10
 - data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
 - data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +7 -15
 - data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
 - data/lib/proxy_fetcher/providers/http_tunnel.rb +11 -19
 - data/lib/proxy_fetcher/providers/mtpro.rb +43 -0
 - data/lib/proxy_fetcher/providers/proxy_list.rb +8 -16
 - data/lib/proxy_fetcher/providers/proxypedia.rb +48 -0
 - data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
 - data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
 - data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
 - data/lib/proxy_fetcher/providers/xroxy.rb +9 -17
 - data/lib/proxy_fetcher/proxy.rb +16 -4
 - data/lib/proxy_fetcher/utils/http_client.rb +7 -12
 - data/lib/proxy_fetcher/utils/proxy_list_validator.rb +3 -1
 - data/lib/proxy_fetcher/utils/proxy_validator.rb +21 -9
 - data/lib/proxy_fetcher/version.rb +3 -3
 - data/proxy_fetcher.gemspec +21 -16
 - data/spec/fixtures/proxies.txt +14 -0
 - data/spec/proxy_fetcher/client/client_spec.rb +72 -57
 - data/spec/proxy_fetcher/configuration_spec.rb +11 -11
 - data/spec/proxy_fetcher/document/adapters_spec.rb +8 -8
 - data/spec/proxy_fetcher/document/node_spec.rb +4 -4
 - data/spec/proxy_fetcher/manager_spec.rb +18 -0
 - data/spec/proxy_fetcher/providers/base_spec.rb +9 -9
 - data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +4 -4
 - data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
 - data/spec/proxy_fetcher/proxy_spec.rb +14 -14
 - data/spec/proxy_fetcher/version_spec.rb +2 -0
 - data/spec/spec_helper.rb +10 -10
 - data/spec/support/manager_examples.rb +21 -21
 - metadata +27 -17
 - data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -58
 - data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
 - data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
 - data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
 - data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
 - data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
 - data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
 
| 
         @@ -13,7 +13,7 @@ module ProxyFetcher 
     | 
|
| 
       13 
13 
     | 
    
         
             
                  # @return [WrongCustomClass]
         
     | 
| 
       14 
14 
     | 
    
         
             
                  #
         
     | 
| 
       15 
15 
     | 
    
         
             
                  def initialize(klass, methods)
         
     | 
| 
       16 
     | 
    
         
            -
                    required_methods = Array(methods).join( 
     | 
| 
      
 16 
     | 
    
         
            +
                    required_methods = Array(methods).join(", ")
         
     | 
| 
       17 
17 
     | 
    
         
             
                    super("#{klass} must respond to [#{required_methods}] class methods!")
         
     | 
| 
       18 
18 
     | 
    
         
             
                  end
         
     | 
| 
       19 
19 
     | 
    
         
             
                end
         
     | 
| 
         @@ -53,7 +53,7 @@ module ProxyFetcher 
     | 
|
| 
       53 
53 
     | 
    
         
             
                  # @return [MaximumRedirectsReached]
         
     | 
| 
       54 
54 
     | 
    
         
             
                  #
         
     | 
| 
       55 
55 
     | 
    
         
             
                  def initialize(*)
         
     | 
| 
       56 
     | 
    
         
            -
                    super( 
     | 
| 
      
 56 
     | 
    
         
            +
                    super("maximum redirects reached")
         
     | 
| 
       57 
57 
     | 
    
         
             
                  end
         
     | 
| 
       58 
58 
     | 
    
         
             
                end
         
     | 
| 
       59 
59 
     | 
    
         | 
| 
         @@ -66,7 +66,7 @@ module ProxyFetcher 
     | 
|
| 
       66 
66 
     | 
    
         
             
                  # @return [MaximumRetriesReached]
         
     | 
| 
       67 
67 
     | 
    
         
             
                  #
         
     | 
| 
       68 
68 
     | 
    
         
             
                  def initialize(*)
         
     | 
| 
       69 
     | 
    
         
            -
                    super( 
     | 
| 
      
 69 
     | 
    
         
            +
                    super("reached the maximum number of retries")
         
     | 
| 
       70 
70 
     | 
    
         
             
                  end
         
     | 
| 
       71 
71 
     | 
    
         
             
                end
         
     | 
| 
       72 
72 
     | 
    
         | 
| 
         @@ -95,7 +95,7 @@ module ProxyFetcher 
     | 
|
| 
       95 
95 
     | 
    
         
             
                    super(<<-MSG.strip.squeeze
         
     | 
| 
       96 
96 
     | 
    
         
             
                      you need to specify adapter for HTML parsing: ProxyFetcher.config.adapter = :nokogiri.
         
     | 
| 
       97 
97 
     | 
    
         
             
                      You can use one of the predefined adapters (:nokogiri or :oga) or your own implementation.
         
     | 
| 
       98 
     | 
    
         
            -
             
     | 
| 
      
 98 
     | 
    
         
            +
                    MSG
         
     | 
| 
       99 
99 
     | 
    
         
             
                    )
         
     | 
| 
       100 
100 
     | 
    
         
             
                  end
         
     | 
| 
       101 
101 
     | 
    
         
             
                end
         
     | 
| 
         @@ -111,7 +111,7 @@ module ProxyFetcher 
     | 
|
| 
       111 
111 
     | 
    
         
             
                  # @return [AdapterSetupError]
         
     | 
| 
       112 
112 
     | 
    
         
             
                  #
         
     | 
| 
       113 
113 
     | 
    
         
             
                  def initialize(adapter_name, error)
         
     | 
| 
       114 
     | 
    
         
            -
                    adapter = demodulize(adapter_name.gsub( 
     | 
| 
      
 114 
     | 
    
         
            +
                    adapter = demodulize(adapter_name.gsub("Adapter", ""))
         
     | 
| 
       115 
115 
     | 
    
         | 
| 
       116 
116 
     | 
    
         
             
                    super("can't setup '#{adapter}' adapter during the following error:\n\t#{error}'")
         
     | 
| 
       117 
117 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -127,7 +127,7 @@ module ProxyFetcher 
     | 
|
| 
       127 
127 
     | 
    
         
             
                  #
         
     | 
| 
       128 
128 
     | 
    
         
             
                  def demodulize(path)
         
     | 
| 
       129 
129 
     | 
    
         
             
                    path = path.to_s
         
     | 
| 
       130 
     | 
    
         
            -
                    index = path.rindex( 
     | 
| 
      
 130 
     | 
    
         
            +
                    index = path.rindex("::")
         
     | 
| 
       131 
131 
     | 
    
         | 
| 
       132 
132 
     | 
    
         
             
                    index ? path[(index + 2)..-1] : path
         
     | 
| 
       133 
133 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -3,6 +3,16 @@ 
     | 
|
| 
       3 
3 
     | 
    
         
             
            module ProxyFetcher
         
     | 
| 
       4 
4 
     | 
    
         
             
              # ProxyFetcher Manager class for interacting with proxy lists from various providers.
         
     | 
| 
       5 
5 
     | 
    
         
             
              class Manager
         
     | 
| 
      
 6 
     | 
    
         
            +
                REFRESHER_LOCK = Mutex.new
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                class << self
         
     | 
| 
      
 9 
     | 
    
         
            +
                  def from_files(files, **options)
         
     | 
| 
      
 10 
     | 
    
         
            +
                    new(**options.merge(files: Array(files)))
         
     | 
| 
      
 11 
     | 
    
         
            +
                  end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                  alias from_file from_files
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
       6 
16 
     | 
    
         
             
                # @!attribute [r] proxies
         
     | 
| 
       7 
17 
     | 
    
         
             
                #   @return [Array<ProxyFetcher::Proxy>] An array of proxies
         
     | 
| 
       8 
18 
     | 
    
         
             
                attr_reader :proxies
         
     | 
| 
         @@ -14,14 +24,17 @@ module ProxyFetcher 
     | 
|
| 
       14 
24 
     | 
    
         
             
                #
         
     | 
| 
       15 
25 
     | 
    
         
             
                # @return [Manager]
         
     | 
| 
       16 
26 
     | 
    
         
             
                #
         
     | 
| 
       17 
     | 
    
         
            -
                def initialize( 
     | 
| 
       18 
     | 
    
         
            -
                  if refresh
         
     | 
| 
       19 
     | 
    
         
            -
                    refresh_list!(filters)
         
     | 
| 
      
 27 
     | 
    
         
            +
                def initialize(**options)
         
     | 
| 
      
 28 
     | 
    
         
            +
                  if options.fetch(:refresh, true)
         
     | 
| 
      
 29 
     | 
    
         
            +
                    refresh_list!(options.fetch(:filters, {}))
         
     | 
| 
       20 
30 
     | 
    
         
             
                  else
         
     | 
| 
       21 
31 
     | 
    
         
             
                    @proxies = []
         
     | 
| 
       22 
32 
     | 
    
         
             
                  end
         
     | 
| 
       23 
33 
     | 
    
         | 
| 
       24 
     | 
    
         
            -
                   
     | 
| 
      
 34 
     | 
    
         
            +
                  files = Array(options.fetch(:file, options.fetch(:files, [])))
         
     | 
| 
      
 35 
     | 
    
         
            +
                  load_proxies_from_files!(files) if files&.any?
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                  cleanup! if options.fetch(:validate, false)
         
     | 
| 
       25 
38 
     | 
    
         
             
                end
         
     | 
| 
       26 
39 
     | 
    
         | 
| 
       27 
40 
     | 
    
         
             
                # Update current proxy list using configured providers.
         
     | 
| 
         @@ -30,17 +43,17 @@ module ProxyFetcher 
     | 
|
| 
       30 
43 
     | 
    
         
             
                #
         
     | 
| 
       31 
44 
     | 
    
         
             
                def refresh_list!(filters = nil)
         
     | 
| 
       32 
45 
     | 
    
         
             
                  @proxies = []
         
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
46 
     | 
    
         
             
                  threads = []
         
     | 
| 
       35 
     | 
    
         
            -
                  lock = Mutex.new
         
     | 
| 
       36 
47 
     | 
    
         | 
| 
       37 
48 
     | 
    
         
             
                  ProxyFetcher.config.providers.each do |provider_name|
         
     | 
| 
       38 
49 
     | 
    
         
             
                    threads << Thread.new do
         
     | 
| 
      
 50 
     | 
    
         
            +
                      Thread.current.report_on_exception = false
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
       39 
52 
     | 
    
         
             
                      provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
         
     | 
| 
       40 
53 
     | 
    
         
             
                      provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
         
     | 
| 
       41 
54 
     | 
    
         
             
                      provider_proxies = provider.fetch_proxies!(provider_filters)
         
     | 
| 
       42 
55 
     | 
    
         | 
| 
       43 
     | 
    
         
            -
                       
     | 
| 
      
 56 
     | 
    
         
            +
                      REFRESHER_LOCK.synchronize do
         
     | 
| 
       44 
57 
     | 
    
         
             
                        @proxies.concat(provider_proxies)
         
     | 
| 
       45 
58 
     | 
    
         
             
                      end
         
     | 
| 
       46 
59 
     | 
    
         
             
                    end
         
     | 
| 
         @@ -55,7 +68,7 @@ module ProxyFetcher 
     | 
|
| 
       55 
68 
     | 
    
         | 
| 
       56 
69 
     | 
    
         
             
                # Pop just first proxy (and back it to the end of the proxy list).
         
     | 
| 
       57 
70 
     | 
    
         
             
                #
         
     | 
| 
       58 
     | 
    
         
            -
                # @return [Proxy]
         
     | 
| 
      
 71 
     | 
    
         
            +
                # @return [ProxyFetcher::Proxy, NilClass]
         
     | 
| 
       59 
72 
     | 
    
         
             
                #   proxy object from the list
         
     | 
| 
       60 
73 
     | 
    
         
             
                #
         
     | 
| 
       61 
74 
     | 
    
         
             
                def get
         
     | 
| 
         @@ -72,7 +85,7 @@ module ProxyFetcher 
     | 
|
| 
       72 
85 
     | 
    
         
             
                # Pop first valid proxy (and back it to the end of the proxy list)
         
     | 
| 
       73 
86 
     | 
    
         
             
                # Invalid proxies will be removed from the list
         
     | 
| 
       74 
87 
     | 
    
         
             
                #
         
     | 
| 
       75 
     | 
    
         
            -
                # @return [Proxy]
         
     | 
| 
      
 88 
     | 
    
         
            +
                # @return [ProxyFetcher::Proxy, NilClass]
         
     | 
| 
       76 
89 
     | 
    
         
             
                #   proxy object from the list
         
     | 
| 
       77 
90 
     | 
    
         
             
                #
         
     | 
| 
       78 
91 
     | 
    
         
             
                def get!
         
     | 
| 
         @@ -89,6 +102,26 @@ module ProxyFetcher 
     | 
|
| 
       89 
102 
     | 
    
         | 
| 
       90 
103 
     | 
    
         
             
                alias pop! get!
         
     | 
| 
       91 
104 
     | 
    
         | 
| 
      
 105 
     | 
    
         
            +
                # Loads proxies from files.
         
     | 
| 
      
 106 
     | 
    
         
            +
                #
         
     | 
| 
      
 107 
     | 
    
         
            +
                # @param proxy_files [String, Array<String,Pathname>]
         
     | 
| 
      
 108 
     | 
    
         
            +
                #   file path of list of files to load
         
     | 
| 
      
 109 
     | 
    
         
            +
                #
         
     | 
| 
      
 110 
     | 
    
         
            +
                def load_proxies_from_files!(proxy_files)
         
     | 
| 
      
 111 
     | 
    
         
            +
                  proxy_files = Array(proxy_files)
         
     | 
| 
      
 112 
     | 
    
         
            +
                  return if proxy_files.empty?
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                  proxy_files.each do |proxy_file|
         
     | 
| 
      
 115 
     | 
    
         
            +
                    File.foreach(proxy_file, chomp: true) do |proxy_string|
         
     | 
| 
      
 116 
     | 
    
         
            +
                      addr, port = proxy_string.split(":", 2)
         
     | 
| 
      
 117 
     | 
    
         
            +
                      port = Integer(port) if port
         
     | 
| 
      
 118 
     | 
    
         
            +
                      @proxies << Proxy.new(addr: addr, port: port)
         
     | 
| 
      
 119 
     | 
    
         
            +
                    end
         
     | 
| 
      
 120 
     | 
    
         
            +
                  end
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
                  @proxies.uniq!
         
     | 
| 
      
 123 
     | 
    
         
            +
                end
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
       92 
125 
     | 
    
         
             
                # Clean current proxy list from dead proxies (that doesn't respond by timeout)
         
     | 
| 
       93 
126 
     | 
    
         
             
                #
         
     | 
| 
       94 
127 
     | 
    
         
             
                # @return [Array<ProxyFetcher::Proxy>]
         
     | 
| 
         @@ -6,12 +6,15 @@ module ProxyFetcher 
     | 
|
| 
       6 
6 
     | 
    
         
             
                class Base
         
     | 
| 
       7 
7 
     | 
    
         
             
                  # Loads proxy provider page content, extract proxy list from it
         
     | 
| 
       8 
8 
     | 
    
         
             
                  # and convert every entry to proxy object.
         
     | 
| 
       9 
     | 
    
         
            -
                  def fetch_proxies 
     | 
| 
      
 9 
     | 
    
         
            +
                  def fetch_proxies(filters = {})
         
     | 
| 
       10 
10 
     | 
    
         
             
                    raw_proxies = load_proxy_list(filters)
         
     | 
| 
       11 
11 
     | 
    
         
             
                    proxies = raw_proxies.map { |html_node| build_proxy(html_node) }.compact
         
     | 
| 
       12 
12 
     | 
    
         
             
                    proxies.reject { |proxy| proxy.addr.nil? }
         
     | 
| 
       13 
13 
     | 
    
         
             
                  end
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
      
 15 
     | 
    
         
            +
                  # For retro-compatibility
         
     | 
| 
      
 16 
     | 
    
         
            +
                  alias fetch_proxies! fetch_proxies
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
       15 
18 
     | 
    
         
             
                  def provider_url
         
     | 
| 
       16 
19 
     | 
    
         
             
                    raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
         
     | 
| 
       17 
20 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -24,10 +27,17 @@ module ProxyFetcher 
     | 
|
| 
       24 
27 
     | 
    
         
             
                    {}
         
     | 
| 
       25 
28 
     | 
    
         
             
                  end
         
     | 
| 
       26 
29 
     | 
    
         | 
| 
      
 30 
     | 
    
         
            +
                  # @return [Hash]
         
     | 
| 
      
 31 
     | 
    
         
            +
                  #   Provider headers required to fetch the proxy list
         
     | 
| 
      
 32 
     | 
    
         
            +
                  #
         
     | 
| 
       27 
33 
     | 
    
         
             
                  def provider_headers
         
     | 
| 
       28 
34 
     | 
    
         
             
                    {}
         
     | 
| 
       29 
35 
     | 
    
         
             
                  end
         
     | 
| 
       30 
36 
     | 
    
         | 
| 
      
 37 
     | 
    
         
            +
                  def xpath
         
     | 
| 
      
 38 
     | 
    
         
            +
                    raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
         
     | 
| 
      
 39 
     | 
    
         
            +
                  end
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
       31 
41 
     | 
    
         
             
                  # Just synthetic sugar to make it easier to call #fetch_proxies! method.
         
     | 
| 
       32 
42 
     | 
    
         
             
                  def self.fetch_proxies!(*args)
         
     | 
| 
       33 
43 
     | 
    
         
             
                    new.fetch_proxies!(*args)
         
     | 
| 
         @@ -37,18 +47,29 @@ module ProxyFetcher 
     | 
|
| 
       37 
47 
     | 
    
         | 
| 
       38 
48 
     | 
    
         
             
                  # Loads raw provider HTML with proxies.
         
     | 
| 
       39 
49 
     | 
    
         
             
                  #
         
     | 
| 
      
 50 
     | 
    
         
            +
                  # @param url [String]
         
     | 
| 
      
 51 
     | 
    
         
            +
                  #   Provider URL
         
     | 
| 
      
 52 
     | 
    
         
            +
                  #
         
     | 
| 
      
 53 
     | 
    
         
            +
                  # @param filters [#to_h]
         
     | 
| 
      
 54 
     | 
    
         
            +
                  #   Provider filters (Hash-like object)
         
     | 
| 
      
 55 
     | 
    
         
            +
                  #
         
     | 
| 
       40 
56 
     | 
    
         
             
                  # @return [String]
         
     | 
| 
       41 
     | 
    
         
            -
                  #   HTML body
         
     | 
| 
      
 57 
     | 
    
         
            +
                  #   HTML body from the response
         
     | 
| 
       42 
58 
     | 
    
         
             
                  #
         
     | 
| 
       43 
59 
     | 
    
         
             
                  def load_html(url, filters = {})
         
     | 
| 
       44 
     | 
    
         
            -
                     
     | 
| 
      
 60 
     | 
    
         
            +
                    unless filters.respond_to?(:to_h)
         
     | 
| 
      
 61 
     | 
    
         
            +
                      raise ArgumentError, "filters must be a Hash or respond to #to_h"
         
     | 
| 
      
 62 
     | 
    
         
            +
                    end
         
     | 
| 
       45 
63 
     | 
    
         | 
| 
       46 
     | 
    
         
            -
                     
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
      
 64 
     | 
    
         
            +
                    if filters&.any?
         
     | 
| 
      
 65 
     | 
    
         
            +
                      # TODO: query for post request?
         
     | 
| 
      
 66 
     | 
    
         
            +
                      uri = URI.parse(url)
         
     | 
| 
      
 67 
     | 
    
         
            +
                      uri.query = URI.encode_www_form(provider_params.merge(filters.to_h))
         
     | 
| 
      
 68 
     | 
    
         
            +
                      url = uri.to_s
         
     | 
| 
      
 69 
     | 
    
         
            +
                    end
         
     | 
| 
       49 
70 
     | 
    
         | 
| 
       50 
71 
     | 
    
         
             
                    ProxyFetcher.config.http_client.fetch(
         
     | 
| 
       51 
     | 
    
         
            -
                       
     | 
| 
      
 72 
     | 
    
         
            +
                      url,
         
     | 
| 
       52 
73 
     | 
    
         
             
                      method: provider_method,
         
     | 
| 
       53 
74 
     | 
    
         
             
                      headers: provider_headers,
         
     | 
| 
       54 
75 
     | 
    
         
             
                      params: provider_params
         
     | 
| 
         @@ -71,29 +92,29 @@ module ProxyFetcher 
     | 
|
| 
       71 
92 
     | 
    
         
             
                    ProxyFetcher::Document.parse(html)
         
     | 
| 
       72 
93 
     | 
    
         
             
                  end
         
     | 
| 
       73 
94 
     | 
    
         | 
| 
      
 95 
     | 
    
         
            +
                  # Fetches HTML content by sending HTTP request to the provider URL and
         
     | 
| 
      
 96 
     | 
    
         
            +
                  # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
         
     | 
| 
      
 97 
     | 
    
         
            +
                  # to return all the proxy entries (HTML nodes).
         
     | 
| 
      
 98 
     | 
    
         
            +
                  #
         
     | 
| 
      
 99 
     | 
    
         
            +
                  # @return [Array<ProxyFetcher::Document::Node>]
         
     | 
| 
      
 100 
     | 
    
         
            +
                  #   Collection of extracted HTML nodes with full proxy info
         
     | 
| 
      
 101 
     | 
    
         
            +
                  #
         
     | 
| 
      
 102 
     | 
    
         
            +
                  def load_proxy_list(filters = {})
         
     | 
| 
      
 103 
     | 
    
         
            +
                    doc = load_document(provider_url, filters)
         
     | 
| 
      
 104 
     | 
    
         
            +
                    doc.xpath(xpath)
         
     | 
| 
      
 105 
     | 
    
         
            +
                  end
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
       74 
107 
     | 
    
         
             
                  def build_proxy(*args)
         
     | 
| 
       75 
108 
     | 
    
         
             
                    to_proxy(*args)
         
     | 
| 
       76 
     | 
    
         
            -
                  rescue StandardError =>  
     | 
| 
      
 109 
     | 
    
         
            +
                  rescue StandardError => e
         
     | 
| 
       77 
110 
     | 
    
         
             
                    ProxyFetcher.logger.warn(
         
     | 
| 
       78 
     | 
    
         
            -
                      "Failed to build Proxy  
     | 
| 
      
 111 
     | 
    
         
            +
                      "Failed to build Proxy for #{self.class.name.split("::").last} " \
         
     | 
| 
      
 112 
     | 
    
         
            +
                      "due to error: #{e.message}"
         
     | 
| 
       79 
113 
     | 
    
         
             
                    )
         
     | 
| 
       80 
114 
     | 
    
         | 
| 
       81 
115 
     | 
    
         
             
                    nil
         
     | 
| 
       82 
116 
     | 
    
         
             
                  end
         
     | 
| 
       83 
117 
     | 
    
         | 
| 
       84 
     | 
    
         
            -
                  # Fetches HTML content by sending HTTP request to the provider URL and
         
     | 
| 
       85 
     | 
    
         
            -
                  # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
         
     | 
| 
       86 
     | 
    
         
            -
                  # to return all the proxy entries (HTML nodes).
         
     | 
| 
       87 
     | 
    
         
            -
                  #
         
     | 
| 
       88 
     | 
    
         
            -
                  # Abstract method. Must be implemented in a descendant class
         
     | 
| 
       89 
     | 
    
         
            -
                  #
         
     | 
| 
       90 
     | 
    
         
            -
                  # @return [Array<Document::Node>]
         
     | 
| 
       91 
     | 
    
         
            -
                  #   list of proxy elements from the providers HTML content
         
     | 
| 
       92 
     | 
    
         
            -
                  #
         
     | 
| 
       93 
     | 
    
         
            -
                  def load_proxy_list(*)
         
     | 
| 
       94 
     | 
    
         
            -
                    raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
         
     | 
| 
       95 
     | 
    
         
            -
                  end
         
     | 
| 
       96 
     | 
    
         
            -
             
     | 
| 
       97 
118 
     | 
    
         
             
                  # Convert HTML element with proxy info to ProxyFetcher::Proxy instance.
         
     | 
| 
       98 
119 
     | 
    
         
             
                  #
         
     | 
| 
       99 
120 
     | 
    
         
             
                  # Abstract method. Must be implemented in a descendant class
         
     | 
| 
         @@ -6,13 +6,12 @@ module ProxyFetcher 
     | 
|
| 
       6 
6 
     | 
    
         
             
                class FreeProxyList < Base
         
     | 
| 
       7 
7 
     | 
    
         
             
                  # Provider URL to fetch proxy list
         
     | 
| 
       8 
8 
     | 
    
         
             
                  def provider_url
         
     | 
| 
       9 
     | 
    
         
            -
                     
     | 
| 
      
 9 
     | 
    
         
            +
                    "https://free-proxy-list.net/"
         
     | 
| 
       10 
10 
     | 
    
         
             
                  end
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
12 
     | 
    
         
             
                  # [NOTE] Doesn't support filtering
         
     | 
| 
       13 
     | 
    
         
            -
                  def  
     | 
| 
       14 
     | 
    
         
            -
                     
     | 
| 
       15 
     | 
    
         
            -
                    doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
         
     | 
| 
      
 13 
     | 
    
         
            +
                  def xpath
         
     | 
| 
      
 14 
     | 
    
         
            +
                    '//table[@id="proxylisttable"]/tbody/tr'
         
     | 
| 
       16 
15 
     | 
    
         
             
                  end
         
     | 
| 
       17 
16 
     | 
    
         | 
| 
       18 
17 
     | 
    
         
             
                  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
         
     | 
| 
         @@ -26,10 +25,10 @@ module ProxyFetcher 
     | 
|
| 
       26 
25 
     | 
    
         
             
                  #
         
     | 
| 
       27 
26 
     | 
    
         
             
                  def to_proxy(html_node)
         
     | 
| 
       28 
27 
     | 
    
         
             
                    ProxyFetcher::Proxy.new.tap do |proxy|
         
     | 
| 
       29 
     | 
    
         
            -
                      proxy.addr = html_node.content_at( 
     | 
| 
       30 
     | 
    
         
            -
                      proxy.port = Integer(html_node.content_at( 
     | 
| 
       31 
     | 
    
         
            -
                      proxy.country = html_node.content_at( 
     | 
| 
       32 
     | 
    
         
            -
                      proxy.anonymity = html_node.content_at( 
     | 
| 
      
 28 
     | 
    
         
            +
                      proxy.addr = html_node.content_at("td[1]")
         
     | 
| 
      
 29 
     | 
    
         
            +
                      proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
         
     | 
| 
      
 30 
     | 
    
         
            +
                      proxy.country = html_node.content_at("td[4]")
         
     | 
| 
      
 31 
     | 
    
         
            +
                      proxy.anonymity = html_node.content_at("td[5]")
         
     | 
| 
       33 
32 
     | 
    
         
             
                      proxy.type = parse_type(html_node)
         
     | 
| 
       34 
33 
     | 
    
         
             
                    end
         
     | 
| 
       35 
34 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -45,8 +44,8 @@ module ProxyFetcher 
     | 
|
| 
       45 
44 
     | 
    
         
             
                  #   Proxy type
         
     | 
| 
       46 
45 
     | 
    
         
             
                  #
         
     | 
| 
       47 
46 
     | 
    
         
             
                  def parse_type(html_node)
         
     | 
| 
       48 
     | 
    
         
            -
                    https = html_node.content_at( 
     | 
| 
       49 
     | 
    
         
            -
                    https 
     | 
| 
      
 47 
     | 
    
         
            +
                    https = html_node.content_at("td[6]")
         
     | 
| 
      
 48 
     | 
    
         
            +
                    https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
         
     | 
| 
       50 
49 
     | 
    
         
             
                  end
         
     | 
| 
       51 
50 
     | 
    
         
             
                end
         
     | 
| 
       52 
51 
     | 
    
         | 
| 
         @@ -0,0 +1,58 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module ProxyFetcher
         
     | 
| 
      
 4 
     | 
    
         
            +
              module Providers
         
     | 
| 
      
 5 
     | 
    
         
            +
                # FreeProxyListSocks provider class.
         
     | 
| 
      
 6 
     | 
    
         
            +
                class FreeProxyListSocks < Base
         
     | 
| 
      
 7 
     | 
    
         
            +
                  # Provider URL to fetch proxy list
         
     | 
| 
      
 8 
     | 
    
         
            +
                  def provider_url
         
     | 
| 
      
 9 
     | 
    
         
            +
                    "https://www.socks-proxy.net/"
         
     | 
| 
      
 10 
     | 
    
         
            +
                  end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                  # [NOTE] Doesn't support filtering
         
     | 
| 
      
 13 
     | 
    
         
            +
                  def xpath
         
     | 
| 
      
 14 
     | 
    
         
            +
                    '//table[@id="proxylisttable"]/tbody/tr'
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
         
     | 
| 
      
 18 
     | 
    
         
            +
                  # object.
         
     | 
| 
      
 19 
     | 
    
         
            +
                  #
         
     | 
| 
      
 20 
     | 
    
         
            +
                  # @param html_node [Object]
         
     | 
| 
      
 21 
     | 
    
         
            +
                  #   HTML node from the <code>ProxyFetcher::Document</code> DOM model.
         
     | 
| 
      
 22 
     | 
    
         
            +
                  #
         
     | 
| 
      
 23 
     | 
    
         
            +
                  # @return [ProxyFetcher::Proxy]
         
     | 
| 
      
 24 
     | 
    
         
            +
                  #   Proxy object
         
     | 
| 
      
 25 
     | 
    
         
            +
                  #
         
     | 
| 
      
 26 
     | 
    
         
            +
                  def to_proxy(html_node)
         
     | 
| 
      
 27 
     | 
    
         
            +
                    ProxyFetcher::Proxy.new.tap do |proxy|
         
     | 
| 
      
 28 
     | 
    
         
            +
                      proxy.addr = html_node.content_at("td[1]")
         
     | 
| 
      
 29 
     | 
    
         
            +
                      proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
         
     | 
| 
      
 30 
     | 
    
         
            +
                      proxy.country = html_node.content_at("td[4]")
         
     | 
| 
      
 31 
     | 
    
         
            +
                      proxy.type = parse_type(html_node)
         
     | 
| 
      
 32 
     | 
    
         
            +
                      proxy.anonymity = html_node.content_at("td[6]")
         
     | 
| 
      
 33 
     | 
    
         
            +
                    end
         
     | 
| 
      
 34 
     | 
    
         
            +
                  end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                  private
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                  # Parses HTML node to extract proxy type.
         
     | 
| 
      
 39 
     | 
    
         
            +
                  #
         
     | 
| 
      
 40 
     | 
    
         
            +
                  # @param html_node [Object]
         
     | 
| 
      
 41 
     | 
    
         
            +
                  #   HTML node from the <code>ProxyFetcher::Document</code> DOM model.
         
     | 
| 
      
 42 
     | 
    
         
            +
                  #
         
     | 
| 
      
 43 
     | 
    
         
            +
                  # @return [String]
         
     | 
| 
      
 44 
     | 
    
         
            +
                  #   Proxy type
         
     | 
| 
      
 45 
     | 
    
         
            +
                  #
         
     | 
| 
      
 46 
     | 
    
         
            +
                  def parse_type(html_node)
         
     | 
| 
      
 47 
     | 
    
         
            +
                    https = html_node.content_at("td[5]")
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                    return ProxyFetcher::Proxy::SOCKS4 if https&.casecmp("socks4")&.zero?
         
     | 
| 
      
 50 
     | 
    
         
            +
                    return ProxyFetcher::Proxy::SOCKS5 if https&.casecmp("socks5")&.zero?
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                    "Unknown"
         
     | 
| 
      
 53 
     | 
    
         
            +
                  end
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                ProxyFetcher::Configuration.register_provider(:free_proxy_list_socks, FreeProxyListSocks)
         
     | 
| 
      
 57 
     | 
    
         
            +
              end
         
     | 
| 
      
 58 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -6,20 +6,12 @@ module ProxyFetcher 
     | 
|
| 
       6 
6 
     | 
    
         
             
                class FreeProxyListSSL < Base
         
     | 
| 
       7 
7 
     | 
    
         
             
                  # Provider URL to fetch proxy list
         
     | 
| 
       8 
8 
     | 
    
         
             
                  def provider_url
         
     | 
| 
       9 
     | 
    
         
            -
                     
     | 
| 
      
 9 
     | 
    
         
            +
                    "https://www.sslproxies.org/"
         
     | 
| 
       10 
10 
     | 
    
         
             
                  end
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
                  # Fetches HTML content by sending HTTP request to the provider URL and
         
     | 
| 
       13 
     | 
    
         
            -
                  # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
         
     | 
| 
       14 
     | 
    
         
            -
                  # to return all the proxy entries (HTML nodes).
         
     | 
| 
       15 
     | 
    
         
            -
                  #
         
     | 
| 
       16 
     | 
    
         
            -
                  # @return [Array<ProxyFetcher::Document::Node>]
         
     | 
| 
       17 
     | 
    
         
            -
                  #   Collection of extracted HTML nodes with full proxy info
         
     | 
| 
       18 
     | 
    
         
            -
                  #
         
     | 
| 
       19 
12 
     | 
    
         
             
                  # [NOTE] Doesn't support filtering
         
     | 
| 
       20 
     | 
    
         
            -
                  def  
     | 
| 
       21 
     | 
    
         
            -
                     
     | 
| 
       22 
     | 
    
         
            -
                    doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
         
     | 
| 
      
 13 
     | 
    
         
            +
                  def xpath
         
     | 
| 
      
 14 
     | 
    
         
            +
                    '//table[@id="proxylisttable"]/tbody/tr'
         
     | 
| 
       23 
15 
     | 
    
         
             
                  end
         
     | 
| 
       24 
16 
     | 
    
         | 
| 
       25 
17 
     | 
    
         
             
                  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
         
     | 
| 
         @@ -33,10 +25,10 @@ module ProxyFetcher 
     | 
|
| 
       33 
25 
     | 
    
         
             
                  #
         
     | 
| 
       34 
26 
     | 
    
         
             
                  def to_proxy(html_node)
         
     | 
| 
       35 
27 
     | 
    
         
             
                    ProxyFetcher::Proxy.new.tap do |proxy|
         
     | 
| 
       36 
     | 
    
         
            -
                      proxy.addr = html_node.content_at( 
     | 
| 
       37 
     | 
    
         
            -
                      proxy.port = Integer(html_node.content_at( 
     | 
| 
       38 
     | 
    
         
            -
                      proxy.country = html_node.content_at( 
     | 
| 
       39 
     | 
    
         
            -
                      proxy.anonymity = html_node.content_at( 
     | 
| 
      
 28 
     | 
    
         
            +
                      proxy.addr = html_node.content_at("td[1]")
         
     | 
| 
      
 29 
     | 
    
         
            +
                      proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
         
     | 
| 
      
 30 
     | 
    
         
            +
                      proxy.country = html_node.content_at("td[4]")
         
     | 
| 
      
 31 
     | 
    
         
            +
                      proxy.anonymity = html_node.content_at("td[5]")
         
     | 
| 
       40 
32 
     | 
    
         
             
                      proxy.type = ProxyFetcher::Proxy::HTTPS
         
     | 
| 
       41 
33 
     | 
    
         
             
                    end
         
     | 
| 
       42 
34 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -0,0 +1,54 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module ProxyFetcher
         
     | 
| 
      
 4 
     | 
    
         
            +
              module Providers
         
     | 
| 
      
 5 
     | 
    
         
            +
                # FreeProxyListUS provider class.
         
     | 
| 
      
 6 
     | 
    
         
            +
                class FreeProxyListUS < Base
         
     | 
| 
      
 7 
     | 
    
         
            +
                  # Provider URL to fetch proxy list
         
     | 
| 
      
 8 
     | 
    
         
            +
                  def provider_url
         
     | 
| 
      
 9 
     | 
    
         
            +
                    "https://www.us-proxy.org/"
         
     | 
| 
      
 10 
     | 
    
         
            +
                  end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                  # [NOTE] Doesn't support filtering
         
     | 
| 
      
 13 
     | 
    
         
            +
                  def xpath
         
     | 
| 
      
 14 
     | 
    
         
            +
                    '//table[@id="proxylisttable"]/tbody/tr'
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
         
     | 
| 
      
 18 
     | 
    
         
            +
                  # object.
         
     | 
| 
      
 19 
     | 
    
         
            +
                  #
         
     | 
| 
      
 20 
     | 
    
         
            +
                  # @param html_node [Object]
         
     | 
| 
      
 21 
     | 
    
         
            +
                  #   HTML node from the <code>ProxyFetcher::Document</code> DOM model.
         
     | 
| 
      
 22 
     | 
    
         
            +
                  #
         
     | 
| 
      
 23 
     | 
    
         
            +
                  # @return [ProxyFetcher::Proxy]
         
     | 
| 
      
 24 
     | 
    
         
            +
                  #   Proxy object
         
     | 
| 
      
 25 
     | 
    
         
            +
                  #
         
     | 
| 
      
 26 
     | 
    
         
            +
                  def to_proxy(html_node)
         
     | 
| 
      
 27 
     | 
    
         
            +
                    ProxyFetcher::Proxy.new.tap do |proxy|
         
     | 
| 
      
 28 
     | 
    
         
            +
                      proxy.addr = html_node.content_at("td[1]")
         
     | 
| 
      
 29 
     | 
    
         
            +
                      proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
         
     | 
| 
      
 30 
     | 
    
         
            +
                      proxy.country = html_node.content_at("td[4]")
         
     | 
| 
      
 31 
     | 
    
         
            +
                      proxy.anonymity = html_node.content_at("td[5]")
         
     | 
| 
      
 32 
     | 
    
         
            +
                      proxy.type = parse_type(html_node)
         
     | 
| 
      
 33 
     | 
    
         
            +
                    end
         
     | 
| 
      
 34 
     | 
    
         
            +
                  end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                  private
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                  # Parses HTML node to extract proxy type.
         
     | 
| 
      
 39 
     | 
    
         
            +
                  #
         
     | 
| 
      
 40 
     | 
    
         
            +
                  # @param html_node [Object]
         
     | 
| 
      
 41 
     | 
    
         
            +
                  #   HTML node from the <code>ProxyFetcher::Document</code> DOM model.
         
     | 
| 
      
 42 
     | 
    
         
            +
                  #
         
     | 
| 
      
 43 
     | 
    
         
            +
                  # @return [String]
         
     | 
| 
      
 44 
     | 
    
         
            +
                  #   Proxy type
         
     | 
| 
      
 45 
     | 
    
         
            +
                  #
         
     | 
| 
      
 46 
     | 
    
         
            +
                  def parse_type(html_node)
         
     | 
| 
      
 47 
     | 
    
         
            +
                    https = html_node.content_at("td[7]")
         
     | 
| 
      
 48 
     | 
    
         
            +
                    https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
         
     | 
| 
      
 49 
     | 
    
         
            +
                  end
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
         
     | 
| 
      
 53 
     | 
    
         
            +
              end
         
     | 
| 
      
 54 
     | 
    
         
            +
            end
         
     |