proxy_fetcher 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d57af14d443aa759d7a82f20f68f073d1cf7f864204b9ee70233c8dfad83903c
4
- data.tar.gz: 69fcafad4c583fae0d192cf40d0687377158af6145ad3444e8e74d10e7c98bb7
3
+ metadata.gz: '09772e9bc018d8accb01401b2e8b1897804e0a948c67a7eeeef8b1a2e9fcd245'
4
+ data.tar.gz: 16b49036c2ecdd06e23f53c4fe55982ec7ceb1531e9e223f31590eb83417c4e0
5
5
  SHA512:
6
- metadata.gz: 9023c9bc9b6634e9d8b0e58af0a86c918deff988afbac0fd7872cdc6c85a5fca6b41d92b179687241f2af54d1b0bfbf3e9bd8c97610c7fe3db837b4b91504c66
7
- data.tar.gz: 5da728c2e6d57d458df3aa93ef689c4ab1e892e183a2dc780026643cfcc82e017ecf626dc635d5fdd65b95204be784b83d620c188dba9129c30345d3939a28f6
6
+ metadata.gz: 526c8fdcfb1171c09f2e9bf6a4e13dbbc6e837963ce1c1ef7e2e48ed1ec3052e3dbc6a4d04bb33a0b3f083914149c22af4b4238a6c558825a91e3f26e112a378
7
+ data.tar.gz: 8dddea87295d1825910e4eecb4a5a16836b06e0070cc07f823a4902d6952550ae3b016bfb2e29d0ce8cc16ba3add12ad28a6ab6fec8eccd8a1656e368d88be5d
data/CHANGELOG.md CHANGED
@@ -6,6 +6,13 @@ Reverse Chronological Order:
6
6
 
7
7
  * Add your description here
8
8
 
9
+ ## `0.13.0` (2020-03-09)
10
+
11
+ * Fix GatherProxy provider
12
+ * Fix XRoxy provider
13
+ * Allow ability to load proxies from files
14
+ * Fix Proxy object comparators
15
+
9
16
  ## `0.12.0` (2020-01-28)
10
17
 
11
18
  * Fix XRoxy provider
data/Gemfile CHANGED
@@ -5,7 +5,7 @@ source "https://rubygems.org"
5
5
  gemspec
6
6
 
7
7
  gem "nokogiri", "~> 1.8"
8
- gem "oga", "~> 2.0"
8
+ gem "oga", "~> 3.2"
9
9
  gem "rubocop", "~> 0.74"
10
10
 
11
11
  group :test do
@@ -51,7 +51,7 @@ module ProxyFetcher
51
51
  attr_reader :proxy_validator
52
52
 
53
53
  # @!attribute [r] providers
54
- # @return [Array<String>, Array<Symbol>] proxy providers list to be used
54
+ # @return [Array<String, Symbol>] proxy providers list to be used
55
55
  attr_reader :providers
56
56
 
57
57
  # User-Agent string that will be used by the ProxyFetcher HTTP client (to
@@ -95,7 +95,7 @@ module ProxyFetcher
95
95
 
96
96
  # Returns registered providers names.
97
97
  #
98
- # @return [Array<String>, Array<Symbol>]
98
+ # @return [Array<String, Symbol>]
99
99
  # registered providers names
100
100
  #
101
101
  def registered_providers
@@ -144,7 +144,7 @@ module ProxyFetcher
144
144
 
145
145
  # Setups collection of providers that will be used to fetch proxies.
146
146
  #
147
- # @param value [String, Symbol, Array<String>, Array<Symbol>]
147
+ # @param value [String, Symbol, Array<String, Symbol>]
148
148
  # provider names
149
149
  #
150
150
  def providers=(value)
@@ -3,6 +3,16 @@
3
3
  module ProxyFetcher
4
4
  # ProxyFetcher Manager class for interacting with proxy lists from various providers.
5
5
  class Manager
6
+ REFRESHER_LOCK = Mutex.new
7
+
8
+ class << self
9
+ def from_files(files, **options)
10
+ new(**options.merge(files: Array(files)))
11
+ end
12
+
13
+ alias from_file from_files
14
+ end
15
+
6
16
  # @!attribute [r] proxies
7
17
  # @return [Array<ProxyFetcher::Proxy>] An array of proxies
8
18
  attr_reader :proxies
@@ -14,14 +24,17 @@ module ProxyFetcher
14
24
  #
15
25
  # @return [Manager]
16
26
  #
17
- def initialize(refresh: true, validate: false, filters: {})
18
- if refresh
19
- refresh_list!(filters)
27
+ def initialize(**options)
28
+ if options.fetch(:refresh, true)
29
+ refresh_list!(options.fetch(:filters, {}))
20
30
  else
21
31
  @proxies = []
22
32
  end
23
33
 
24
- cleanup! if validate
34
+ files = Array(options.fetch(:file, options.fetch(:files, [])))
35
+ load_proxies_from_files!(files) if files&.any?
36
+
37
+ cleanup! if options.fetch(:validate, false)
25
38
  end
26
39
 
27
40
  # Update current proxy list using configured providers.
@@ -30,9 +43,7 @@ module ProxyFetcher
30
43
  #
31
44
  def refresh_list!(filters = nil)
32
45
  @proxies = []
33
-
34
46
  threads = []
35
- lock = Mutex.new
36
47
 
37
48
  ProxyFetcher.config.providers.each do |provider_name|
38
49
  threads << Thread.new do
@@ -40,7 +51,7 @@ module ProxyFetcher
40
51
  provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
41
52
  provider_proxies = provider.fetch_proxies!(provider_filters)
42
53
 
43
- lock.synchronize do
54
+ REFRESHER_LOCK.synchronize do
44
55
  @proxies.concat(provider_proxies)
45
56
  end
46
57
  end
@@ -89,6 +100,26 @@ module ProxyFetcher
89
100
 
90
101
  alias pop! get!
91
102
 
103
+ # Loads proxies from files.
104
+ #
105
+ # @param proxy_files [String, Array<String,Pathname>]
106
+ # file path of list of files to load
107
+ #
108
+ def load_proxies_from_files!(proxy_files)
109
+ proxy_files = Array(proxy_files)
110
+ return if proxy_files.empty?
111
+
112
+ proxy_files.each do |proxy_file|
113
+ File.foreach(proxy_file, chomp: true) do |proxy_string|
114
+ addr, port = proxy_string.split(":", 2)
115
+ port = Integer(port) if port
116
+ @proxies << Proxy.new(addr: addr, port: port)
117
+ end
118
+ end
119
+
120
+ @proxies.uniq!
121
+ end
122
+
92
123
  # Clean current proxy list from dead proxies (that doesn't respond by timeout)
93
124
  #
94
125
  # @return [Array<ProxyFetcher::Proxy>]
@@ -108,7 +108,8 @@ module ProxyFetcher
108
108
  to_proxy(*args)
109
109
  rescue StandardError => e
110
110
  ProxyFetcher.logger.warn(
111
- "Failed to build Proxy object for #{self.class.name} due to error: #{e.message}"
111
+ "Failed to build Proxy for #{self.class.name.split("::").last} " \
112
+ "due to error: #{e.message}"
112
113
  )
113
114
 
114
115
  nil
@@ -8,7 +8,7 @@ module ProxyFetcher
8
8
  class GatherProxy < Base
9
9
  # Provider URL to fetch proxy list
10
10
  def provider_url
11
- "http://www.gatherproxy.com/"
11
+ "https://proxygather.com"
12
12
  end
13
13
 
14
14
  def xpath
@@ -24,12 +24,12 @@ module ProxyFetcher
24
24
  #
25
25
  def to_proxy(html_node)
26
26
  ProxyFetcher::Proxy.new.tap do |proxy|
27
- proxy.addr = html_node.content_at("td[2]")
28
- proxy.port = Integer(html_node.content_at("td[3]").gsub(/^0+/, ""))
29
- proxy.anonymity = html_node.content_at("td[4]")
30
- proxy.country = html_node.content_at("td[6]")
31
- proxy.response_time = Integer(html_node.content_at("td[7]"))
32
- proxy.type = html_node.content_at("td[4]")
27
+ proxy.addr = html_node.content_at("td[1]")
28
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
29
+ proxy.anonymity = html_node.content_at("td[3]")
30
+ proxy.country = html_node.content_at("td[5]")
31
+ proxy.response_time = Integer(html_node.content_at("td[6]"))
32
+ proxy.type = html_node.content_at("td[3]")
33
33
  end
34
34
  end
35
35
  end
@@ -104,5 +104,17 @@ module ProxyFetcher
104
104
  URI::Generic.build(host: addr, port: port).to_s
105
105
  end
106
106
  end
107
+
108
+ def ==(other)
109
+ other.is_a?(Proxy) && addr == other.addr && port == other.port
110
+ end
111
+
112
+ def eql?(other)
113
+ hash.eql?(other.hash)
114
+ end
115
+
116
+ def hash
117
+ [addr.hash, port.hash].hash
118
+ end
107
119
  end
108
120
  end
@@ -13,7 +13,7 @@ module ProxyFetcher
13
13
  # Major version number
14
14
  MAJOR = 0
15
15
  # Minor version number
16
- MINOR = 12
16
+ MINOR = 13
17
17
  # Smallest version number
18
18
  TINY = 0
19
19
 
data/lib/proxy_fetcher.rb CHANGED
@@ -46,7 +46,6 @@ module ProxyFetcher
46
46
 
47
47
  # Main ProxyFetcher module.
48
48
  class << self
49
-
50
49
  ##
51
50
  # Returns ProxyFetcher configuration.
52
51
  #
@@ -57,9 +56,9 @@ module ProxyFetcher
57
56
  # ProxyFetcher.config
58
57
  #
59
58
  # #=> #<ProxyFetcher::Configuration:0x0000000241eec8 @user_agent="Mozilla/5.0, ...", @pool_size=10,
60
- # @client_timeout=3, @proxy_validation_timeout=3, @provider_proxies_load_timeout=30,
61
- # @http_client=ProxyFetcher::HTTPClient, @proxy_validator=ProxyFetcher::ProxyValidator,
62
- # @providers=[:free_proxy_list, ...], @adapter=ProxyFetcher::Document::NokogiriAdapter>
59
+ # @client_timeout=3, @proxy_validation_timeout=3, @provider_proxies_load_timeout=30,
60
+ # @http_client=ProxyFetcher::HTTPClient, @proxy_validator=ProxyFetcher::ProxyValidator,
61
+ # @providers=[:free_proxy_list, ...], @adapter=ProxyFetcher::Document::NokogiriAdapter>
63
62
  #
64
63
  def config
65
64
  @__config_definition_lock__.synchronize do
@@ -0,0 +1,14 @@
1
+ 139.162.59.9:3128
2
+ 176.62.187.158:56351
3
+ 144.217.22.142:8080
4
+ 176.55.108.21:3128
5
+ 157.225.214.251:3128
6
+ 202.51.49.52:48298
7
+ 104.244.75.26:8080
8
+ 163.172.28.20:80
9
+ 187.56.191.12:3128
10
+ 129.194.12.26:80
11
+ 178.128.39.39:8080
12
+ 181.30.28.15:8080
13
+ 51.181.96.185:8080
14
+ 148.134.10.13
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ describe ProxyFetcher::Manager do
6
+ it "can initialize with a proxies from file(s)" do
7
+ manager = described_class.new(refresh: false, file: "spec/fixtures/proxies.txt")
8
+
9
+ expect(manager.proxies.size).to be(14)
10
+
11
+ manager = described_class.new(
12
+ refresh: false,
13
+ file: ["spec/fixtures/proxies.txt", "spec/fixtures/proxies.txt"]
14
+ )
15
+
16
+ expect(manager.proxies.size).to be(14)
17
+ end
18
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxy_fetcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nikita Bulai
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-28 00:00:00.000000000 Z
11
+ date: 2020-03-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: http
@@ -105,10 +105,12 @@ files:
105
105
  - lib/proxy_fetcher/utils/proxy_validator.rb
106
106
  - lib/proxy_fetcher/version.rb
107
107
  - proxy_fetcher.gemspec
108
+ - spec/fixtures/proxies.txt
108
109
  - spec/proxy_fetcher/client/client_spec.rb
109
110
  - spec/proxy_fetcher/configuration_spec.rb
110
111
  - spec/proxy_fetcher/document/adapters_spec.rb
111
112
  - spec/proxy_fetcher/document/node_spec.rb
113
+ - spec/proxy_fetcher/manager_spec.rb
112
114
  - spec/proxy_fetcher/providers/base_spec.rb
113
115
  - spec/proxy_fetcher/providers/free_proxy_list_spec.rb
114
116
  - spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb