proxy_fetcher 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile +1 -1
- data/lib/proxy_fetcher/configuration.rb +3 -3
- data/lib/proxy_fetcher/manager.rb +38 -7
- data/lib/proxy_fetcher/providers/base.rb +2 -1
- data/lib/proxy_fetcher/providers/gather_proxy.rb +1 -1
- data/lib/proxy_fetcher/providers/xroxy.rb +6 -6
- data/lib/proxy_fetcher/proxy.rb +12 -0
- data/lib/proxy_fetcher/version.rb +1 -1
- data/lib/proxy_fetcher.rb +3 -4
- data/spec/fixtures/proxies.txt +14 -0
- data/spec/proxy_fetcher/manager_spec.rb +18 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '09772e9bc018d8accb01401b2e8b1897804e0a948c67a7eeeef8b1a2e9fcd245'
|
4
|
+
data.tar.gz: 16b49036c2ecdd06e23f53c4fe55982ec7ceb1531e9e223f31590eb83417c4e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 526c8fdcfb1171c09f2e9bf6a4e13dbbc6e837963ce1c1ef7e2e48ed1ec3052e3dbc6a4d04bb33a0b3f083914149c22af4b4238a6c558825a91e3f26e112a378
|
7
|
+
data.tar.gz: 8dddea87295d1825910e4eecb4a5a16836b06e0070cc07f823a4902d6952550ae3b016bfb2e29d0ce8cc16ba3add12ad28a6ab6fec8eccd8a1656e368d88be5d
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,13 @@ Reverse Chronological Order:
|
|
6
6
|
|
7
7
|
* Add your description here
|
8
8
|
|
9
|
+
## `0.13.0` (2020-03-09)
|
10
|
+
|
11
|
+
* Fix GatherProxy provider
|
12
|
+
* Fix XRoxy provider
|
13
|
+
* Allow ability to load proxies from files
|
14
|
+
* Fix Proxy object comparators
|
15
|
+
|
9
16
|
## `0.12.0` (2020-01-28)
|
10
17
|
|
11
18
|
* Fix XRoxy provider
|
data/Gemfile
CHANGED
@@ -51,7 +51,7 @@ module ProxyFetcher
|
|
51
51
|
attr_reader :proxy_validator
|
52
52
|
|
53
53
|
# @!attribute [r] providers
|
54
|
-
# @return [Array<String
|
54
|
+
# @return [Array<String, Symbol>] proxy providers list to be used
|
55
55
|
attr_reader :providers
|
56
56
|
|
57
57
|
# User-Agent string that will be used by the ProxyFetcher HTTP client (to
|
@@ -95,7 +95,7 @@ module ProxyFetcher
|
|
95
95
|
|
96
96
|
# Returns registered providers names.
|
97
97
|
#
|
98
|
-
# @return [Array<String
|
98
|
+
# @return [Array<String, Symbol>]
|
99
99
|
# registered providers names
|
100
100
|
#
|
101
101
|
def registered_providers
|
@@ -144,7 +144,7 @@ module ProxyFetcher
|
|
144
144
|
|
145
145
|
# Setups collection of providers that will be used to fetch proxies.
|
146
146
|
#
|
147
|
-
# @param value [String, Symbol, Array<String
|
147
|
+
# @param value [String, Symbol, Array<String, Symbol>]
|
148
148
|
# provider names
|
149
149
|
#
|
150
150
|
def providers=(value)
|
@@ -3,6 +3,16 @@
|
|
3
3
|
module ProxyFetcher
|
4
4
|
# ProxyFetcher Manager class for interacting with proxy lists from various providers.
|
5
5
|
class Manager
|
6
|
+
REFRESHER_LOCK = Mutex.new
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def from_files(files, **options)
|
10
|
+
new(**options.merge(files: Array(files)))
|
11
|
+
end
|
12
|
+
|
13
|
+
alias from_file from_files
|
14
|
+
end
|
15
|
+
|
6
16
|
# @!attribute [r] proxies
|
7
17
|
# @return [Array<ProxyFetcher::Proxy>] An array of proxies
|
8
18
|
attr_reader :proxies
|
@@ -14,14 +24,17 @@ module ProxyFetcher
|
|
14
24
|
#
|
15
25
|
# @return [Manager]
|
16
26
|
#
|
17
|
-
def initialize(
|
18
|
-
if refresh
|
19
|
-
refresh_list!(filters)
|
27
|
+
def initialize(**options)
|
28
|
+
if options.fetch(:refresh, true)
|
29
|
+
refresh_list!(options.fetch(:filters, {}))
|
20
30
|
else
|
21
31
|
@proxies = []
|
22
32
|
end
|
23
33
|
|
24
|
-
|
34
|
+
files = Array(options.fetch(:file, options.fetch(:files, [])))
|
35
|
+
load_proxies_from_files!(files) if files&.any?
|
36
|
+
|
37
|
+
cleanup! if options.fetch(:validate, false)
|
25
38
|
end
|
26
39
|
|
27
40
|
# Update current proxy list using configured providers.
|
@@ -30,9 +43,7 @@ module ProxyFetcher
|
|
30
43
|
#
|
31
44
|
def refresh_list!(filters = nil)
|
32
45
|
@proxies = []
|
33
|
-
|
34
46
|
threads = []
|
35
|
-
lock = Mutex.new
|
36
47
|
|
37
48
|
ProxyFetcher.config.providers.each do |provider_name|
|
38
49
|
threads << Thread.new do
|
@@ -40,7 +51,7 @@ module ProxyFetcher
|
|
40
51
|
provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
|
41
52
|
provider_proxies = provider.fetch_proxies!(provider_filters)
|
42
53
|
|
43
|
-
|
54
|
+
REFRESHER_LOCK.synchronize do
|
44
55
|
@proxies.concat(provider_proxies)
|
45
56
|
end
|
46
57
|
end
|
@@ -89,6 +100,26 @@ module ProxyFetcher
|
|
89
100
|
|
90
101
|
alias pop! get!
|
91
102
|
|
103
|
+
# Loads proxies from files.
|
104
|
+
#
|
105
|
+
# @param proxy_files [String, Array<String,Pathname>]
|
106
|
+
# file path of list of files to load
|
107
|
+
#
|
108
|
+
def load_proxies_from_files!(proxy_files)
|
109
|
+
proxy_files = Array(proxy_files)
|
110
|
+
return if proxy_files.empty?
|
111
|
+
|
112
|
+
proxy_files.each do |proxy_file|
|
113
|
+
File.foreach(proxy_file, chomp: true) do |proxy_string|
|
114
|
+
addr, port = proxy_string.split(":", 2)
|
115
|
+
port = Integer(port) if port
|
116
|
+
@proxies << Proxy.new(addr: addr, port: port)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
@proxies.uniq!
|
121
|
+
end
|
122
|
+
|
92
123
|
# Clean current proxy list from dead proxies (that doesn't respond by timeout)
|
93
124
|
#
|
94
125
|
# @return [Array<ProxyFetcher::Proxy>]
|
@@ -108,7 +108,8 @@ module ProxyFetcher
|
|
108
108
|
to_proxy(*args)
|
109
109
|
rescue StandardError => e
|
110
110
|
ProxyFetcher.logger.warn(
|
111
|
-
"Failed to build Proxy
|
111
|
+
"Failed to build Proxy for #{self.class.name.split("::").last} " \
|
112
|
+
"due to error: #{e.message}"
|
112
113
|
)
|
113
114
|
|
114
115
|
nil
|
@@ -24,12 +24,12 @@ module ProxyFetcher
|
|
24
24
|
#
|
25
25
|
def to_proxy(html_node)
|
26
26
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
27
|
-
proxy.addr = html_node.content_at("td[
|
28
|
-
proxy.port = Integer(html_node.content_at("td[
|
29
|
-
proxy.anonymity = html_node.content_at("td[
|
30
|
-
proxy.country = html_node.content_at("td[
|
31
|
-
proxy.response_time = Integer(html_node.content_at("td[
|
32
|
-
proxy.type = html_node.content_at("td[
|
27
|
+
proxy.addr = html_node.content_at("td[1]")
|
28
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
29
|
+
proxy.anonymity = html_node.content_at("td[3]")
|
30
|
+
proxy.country = html_node.content_at("td[5]")
|
31
|
+
proxy.response_time = Integer(html_node.content_at("td[6]"))
|
32
|
+
proxy.type = html_node.content_at("td[3]")
|
33
33
|
end
|
34
34
|
end
|
35
35
|
end
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -104,5 +104,17 @@ module ProxyFetcher
|
|
104
104
|
URI::Generic.build(host: addr, port: port).to_s
|
105
105
|
end
|
106
106
|
end
|
107
|
+
|
108
|
+
def ==(other)
|
109
|
+
other.is_a?(Proxy) && addr == other.addr && port == other.port
|
110
|
+
end
|
111
|
+
|
112
|
+
def eql?(other)
|
113
|
+
hash.eql?(other.hash)
|
114
|
+
end
|
115
|
+
|
116
|
+
def hash
|
117
|
+
[addr.hash, port.hash].hash
|
118
|
+
end
|
107
119
|
end
|
108
120
|
end
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -46,7 +46,6 @@ module ProxyFetcher
|
|
46
46
|
|
47
47
|
# Main ProxyFetcher module.
|
48
48
|
class << self
|
49
|
-
|
50
49
|
##
|
51
50
|
# Returns ProxyFetcher configuration.
|
52
51
|
#
|
@@ -57,9 +56,9 @@ module ProxyFetcher
|
|
57
56
|
# ProxyFetcher.config
|
58
57
|
#
|
59
58
|
# #=> #<ProxyFetcher::Configuration:0x0000000241eec8 @user_agent="Mozilla/5.0, ...", @pool_size=10,
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
59
|
+
# @client_timeout=3, @proxy_validation_timeout=3, @provider_proxies_load_timeout=30,
|
60
|
+
# @http_client=ProxyFetcher::HTTPClient, @proxy_validator=ProxyFetcher::ProxyValidator,
|
61
|
+
# @providers=[:free_proxy_list, ...], @adapter=ProxyFetcher::Document::NokogiriAdapter>
|
63
62
|
#
|
64
63
|
def config
|
65
64
|
@__config_definition_lock__.synchronize do
|
@@ -0,0 +1,14 @@
|
|
1
|
+
139.162.59.9:3128
|
2
|
+
176.62.187.158:56351
|
3
|
+
144.217.22.142:8080
|
4
|
+
176.55.108.21:3128
|
5
|
+
157.225.214.251:3128
|
6
|
+
202.51.49.52:48298
|
7
|
+
104.244.75.26:8080
|
8
|
+
163.172.28.20:80
|
9
|
+
187.56.191.12:3128
|
10
|
+
129.194.12.26:80
|
11
|
+
178.128.39.39:8080
|
12
|
+
181.30.28.15:8080
|
13
|
+
51.181.96.185:8080
|
14
|
+
148.134.10.13
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe ProxyFetcher::Manager do
|
6
|
+
it "can initialize with a proxies from file(s)" do
|
7
|
+
manager = described_class.new(refresh: false, file: "spec/fixtures/proxies.txt")
|
8
|
+
|
9
|
+
expect(manager.proxies.size).to be(14)
|
10
|
+
|
11
|
+
manager = described_class.new(
|
12
|
+
refresh: false,
|
13
|
+
file: ["spec/fixtures/proxies.txt", "spec/fixtures/proxies.txt"]
|
14
|
+
)
|
15
|
+
|
16
|
+
expect(manager.proxies.size).to be(14)
|
17
|
+
end
|
18
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-03-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|
@@ -105,10 +105,12 @@ files:
|
|
105
105
|
- lib/proxy_fetcher/utils/proxy_validator.rb
|
106
106
|
- lib/proxy_fetcher/version.rb
|
107
107
|
- proxy_fetcher.gemspec
|
108
|
+
- spec/fixtures/proxies.txt
|
108
109
|
- spec/proxy_fetcher/client/client_spec.rb
|
109
110
|
- spec/proxy_fetcher/configuration_spec.rb
|
110
111
|
- spec/proxy_fetcher/document/adapters_spec.rb
|
111
112
|
- spec/proxy_fetcher/document/node_spec.rb
|
113
|
+
- spec/proxy_fetcher/manager_spec.rb
|
112
114
|
- spec/proxy_fetcher/providers/base_spec.rb
|
113
115
|
- spec/proxy_fetcher/providers/free_proxy_list_spec.rb
|
114
116
|
- spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb
|