proxy_fetcher 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile +1 -1
- data/lib/proxy_fetcher/configuration.rb +3 -3
- data/lib/proxy_fetcher/manager.rb +38 -7
- data/lib/proxy_fetcher/providers/base.rb +2 -1
- data/lib/proxy_fetcher/providers/gather_proxy.rb +1 -1
- data/lib/proxy_fetcher/providers/xroxy.rb +6 -6
- data/lib/proxy_fetcher/proxy.rb +12 -0
- data/lib/proxy_fetcher/version.rb +1 -1
- data/lib/proxy_fetcher.rb +3 -4
- data/spec/fixtures/proxies.txt +14 -0
- data/spec/proxy_fetcher/manager_spec.rb +18 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '09772e9bc018d8accb01401b2e8b1897804e0a948c67a7eeeef8b1a2e9fcd245'
|
4
|
+
data.tar.gz: 16b49036c2ecdd06e23f53c4fe55982ec7ceb1531e9e223f31590eb83417c4e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 526c8fdcfb1171c09f2e9bf6a4e13dbbc6e837963ce1c1ef7e2e48ed1ec3052e3dbc6a4d04bb33a0b3f083914149c22af4b4238a6c558825a91e3f26e112a378
|
7
|
+
data.tar.gz: 8dddea87295d1825910e4eecb4a5a16836b06e0070cc07f823a4902d6952550ae3b016bfb2e29d0ce8cc16ba3add12ad28a6ab6fec8eccd8a1656e368d88be5d
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,13 @@ Reverse Chronological Order:
|
|
6
6
|
|
7
7
|
* Add your description here
|
8
8
|
|
9
|
+
## `0.13.0` (2020-03-09)
|
10
|
+
|
11
|
+
* Fix GatherProxy provider
|
12
|
+
* Fix XRoxy provider
|
13
|
+
* Allow ability to load proxies from files
|
14
|
+
* Fix Proxy object comparators
|
15
|
+
|
9
16
|
## `0.12.0` (2020-01-28)
|
10
17
|
|
11
18
|
* Fix XRoxy provider
|
data/Gemfile
CHANGED
@@ -51,7 +51,7 @@ module ProxyFetcher
|
|
51
51
|
attr_reader :proxy_validator
|
52
52
|
|
53
53
|
# @!attribute [r] providers
|
54
|
-
# @return [Array<String
|
54
|
+
# @return [Array<String, Symbol>] proxy providers list to be used
|
55
55
|
attr_reader :providers
|
56
56
|
|
57
57
|
# User-Agent string that will be used by the ProxyFetcher HTTP client (to
|
@@ -95,7 +95,7 @@ module ProxyFetcher
|
|
95
95
|
|
96
96
|
# Returns registered providers names.
|
97
97
|
#
|
98
|
-
# @return [Array<String
|
98
|
+
# @return [Array<String, Symbol>]
|
99
99
|
# registered providers names
|
100
100
|
#
|
101
101
|
def registered_providers
|
@@ -144,7 +144,7 @@ module ProxyFetcher
|
|
144
144
|
|
145
145
|
# Setups collection of providers that will be used to fetch proxies.
|
146
146
|
#
|
147
|
-
# @param value [String, Symbol, Array<String
|
147
|
+
# @param value [String, Symbol, Array<String, Symbol>]
|
148
148
|
# provider names
|
149
149
|
#
|
150
150
|
def providers=(value)
|
@@ -3,6 +3,16 @@
|
|
3
3
|
module ProxyFetcher
|
4
4
|
# ProxyFetcher Manager class for interacting with proxy lists from various providers.
|
5
5
|
class Manager
|
6
|
+
REFRESHER_LOCK = Mutex.new
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def from_files(files, **options)
|
10
|
+
new(**options.merge(files: Array(files)))
|
11
|
+
end
|
12
|
+
|
13
|
+
alias from_file from_files
|
14
|
+
end
|
15
|
+
|
6
16
|
# @!attribute [r] proxies
|
7
17
|
# @return [Array<ProxyFetcher::Proxy>] An array of proxies
|
8
18
|
attr_reader :proxies
|
@@ -14,14 +24,17 @@ module ProxyFetcher
|
|
14
24
|
#
|
15
25
|
# @return [Manager]
|
16
26
|
#
|
17
|
-
def initialize(
|
18
|
-
if refresh
|
19
|
-
refresh_list!(filters)
|
27
|
+
def initialize(**options)
|
28
|
+
if options.fetch(:refresh, true)
|
29
|
+
refresh_list!(options.fetch(:filters, {}))
|
20
30
|
else
|
21
31
|
@proxies = []
|
22
32
|
end
|
23
33
|
|
24
|
-
|
34
|
+
files = Array(options.fetch(:file, options.fetch(:files, [])))
|
35
|
+
load_proxies_from_files!(files) if files&.any?
|
36
|
+
|
37
|
+
cleanup! if options.fetch(:validate, false)
|
25
38
|
end
|
26
39
|
|
27
40
|
# Update current proxy list using configured providers.
|
@@ -30,9 +43,7 @@ module ProxyFetcher
|
|
30
43
|
#
|
31
44
|
def refresh_list!(filters = nil)
|
32
45
|
@proxies = []
|
33
|
-
|
34
46
|
threads = []
|
35
|
-
lock = Mutex.new
|
36
47
|
|
37
48
|
ProxyFetcher.config.providers.each do |provider_name|
|
38
49
|
threads << Thread.new do
|
@@ -40,7 +51,7 @@ module ProxyFetcher
|
|
40
51
|
provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
|
41
52
|
provider_proxies = provider.fetch_proxies!(provider_filters)
|
42
53
|
|
43
|
-
|
54
|
+
REFRESHER_LOCK.synchronize do
|
44
55
|
@proxies.concat(provider_proxies)
|
45
56
|
end
|
46
57
|
end
|
@@ -89,6 +100,26 @@ module ProxyFetcher
|
|
89
100
|
|
90
101
|
alias pop! get!
|
91
102
|
|
103
|
+
# Loads proxies from files.
|
104
|
+
#
|
105
|
+
# @param proxy_files [String, Array<String,Pathname>]
|
106
|
+
# file path of list of files to load
|
107
|
+
#
|
108
|
+
def load_proxies_from_files!(proxy_files)
|
109
|
+
proxy_files = Array(proxy_files)
|
110
|
+
return if proxy_files.empty?
|
111
|
+
|
112
|
+
proxy_files.each do |proxy_file|
|
113
|
+
File.foreach(proxy_file, chomp: true) do |proxy_string|
|
114
|
+
addr, port = proxy_string.split(":", 2)
|
115
|
+
port = Integer(port) if port
|
116
|
+
@proxies << Proxy.new(addr: addr, port: port)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
@proxies.uniq!
|
121
|
+
end
|
122
|
+
|
92
123
|
# Clean current proxy list from dead proxies (that doesn't respond by timeout)
|
93
124
|
#
|
94
125
|
# @return [Array<ProxyFetcher::Proxy>]
|
@@ -108,7 +108,8 @@ module ProxyFetcher
|
|
108
108
|
to_proxy(*args)
|
109
109
|
rescue StandardError => e
|
110
110
|
ProxyFetcher.logger.warn(
|
111
|
-
"Failed to build Proxy
|
111
|
+
"Failed to build Proxy for #{self.class.name.split("::").last} " \
|
112
|
+
"due to error: #{e.message}"
|
112
113
|
)
|
113
114
|
|
114
115
|
nil
|
@@ -24,12 +24,12 @@ module ProxyFetcher
|
|
24
24
|
#
|
25
25
|
def to_proxy(html_node)
|
26
26
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
27
|
-
proxy.addr = html_node.content_at("td[
|
28
|
-
proxy.port = Integer(html_node.content_at("td[
|
29
|
-
proxy.anonymity = html_node.content_at("td[
|
30
|
-
proxy.country = html_node.content_at("td[
|
31
|
-
proxy.response_time = Integer(html_node.content_at("td[
|
32
|
-
proxy.type = html_node.content_at("td[
|
27
|
+
proxy.addr = html_node.content_at("td[1]")
|
28
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
29
|
+
proxy.anonymity = html_node.content_at("td[3]")
|
30
|
+
proxy.country = html_node.content_at("td[5]")
|
31
|
+
proxy.response_time = Integer(html_node.content_at("td[6]"))
|
32
|
+
proxy.type = html_node.content_at("td[3]")
|
33
33
|
end
|
34
34
|
end
|
35
35
|
end
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -104,5 +104,17 @@ module ProxyFetcher
|
|
104
104
|
URI::Generic.build(host: addr, port: port).to_s
|
105
105
|
end
|
106
106
|
end
|
107
|
+
|
108
|
+
def ==(other)
|
109
|
+
other.is_a?(Proxy) && addr == other.addr && port == other.port
|
110
|
+
end
|
111
|
+
|
112
|
+
def eql?(other)
|
113
|
+
hash.eql?(other.hash)
|
114
|
+
end
|
115
|
+
|
116
|
+
def hash
|
117
|
+
[addr.hash, port.hash].hash
|
118
|
+
end
|
107
119
|
end
|
108
120
|
end
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -46,7 +46,6 @@ module ProxyFetcher
|
|
46
46
|
|
47
47
|
# Main ProxyFetcher module.
|
48
48
|
class << self
|
49
|
-
|
50
49
|
##
|
51
50
|
# Returns ProxyFetcher configuration.
|
52
51
|
#
|
@@ -57,9 +56,9 @@ module ProxyFetcher
|
|
57
56
|
# ProxyFetcher.config
|
58
57
|
#
|
59
58
|
# #=> #<ProxyFetcher::Configuration:0x0000000241eec8 @user_agent="Mozilla/5.0, ...", @pool_size=10,
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
59
|
+
# @client_timeout=3, @proxy_validation_timeout=3, @provider_proxies_load_timeout=30,
|
60
|
+
# @http_client=ProxyFetcher::HTTPClient, @proxy_validator=ProxyFetcher::ProxyValidator,
|
61
|
+
# @providers=[:free_proxy_list, ...], @adapter=ProxyFetcher::Document::NokogiriAdapter>
|
63
62
|
#
|
64
63
|
def config
|
65
64
|
@__config_definition_lock__.synchronize do
|
@@ -0,0 +1,14 @@
|
|
1
|
+
139.162.59.9:3128
|
2
|
+
176.62.187.158:56351
|
3
|
+
144.217.22.142:8080
|
4
|
+
176.55.108.21:3128
|
5
|
+
157.225.214.251:3128
|
6
|
+
202.51.49.52:48298
|
7
|
+
104.244.75.26:8080
|
8
|
+
163.172.28.20:80
|
9
|
+
187.56.191.12:3128
|
10
|
+
129.194.12.26:80
|
11
|
+
178.128.39.39:8080
|
12
|
+
181.30.28.15:8080
|
13
|
+
51.181.96.185:8080
|
14
|
+
148.134.10.13
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe ProxyFetcher::Manager do
|
6
|
+
it "can initialize with a proxies from file(s)" do
|
7
|
+
manager = described_class.new(refresh: false, file: "spec/fixtures/proxies.txt")
|
8
|
+
|
9
|
+
expect(manager.proxies.size).to be(14)
|
10
|
+
|
11
|
+
manager = described_class.new(
|
12
|
+
refresh: false,
|
13
|
+
file: ["spec/fixtures/proxies.txt", "spec/fixtures/proxies.txt"]
|
14
|
+
)
|
15
|
+
|
16
|
+
expect(manager.proxies.size).to be(14)
|
17
|
+
end
|
18
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-03-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|
@@ -105,10 +105,12 @@ files:
|
|
105
105
|
- lib/proxy_fetcher/utils/proxy_validator.rb
|
106
106
|
- lib/proxy_fetcher/version.rb
|
107
107
|
- proxy_fetcher.gemspec
|
108
|
+
- spec/fixtures/proxies.txt
|
108
109
|
- spec/proxy_fetcher/client/client_spec.rb
|
109
110
|
- spec/proxy_fetcher/configuration_spec.rb
|
110
111
|
- spec/proxy_fetcher/document/adapters_spec.rb
|
111
112
|
- spec/proxy_fetcher/document/node_spec.rb
|
113
|
+
- spec/proxy_fetcher/manager_spec.rb
|
112
114
|
- spec/proxy_fetcher/providers/base_spec.rb
|
113
115
|
- spec/proxy_fetcher/providers/free_proxy_list_spec.rb
|
114
116
|
- spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb
|