get_proxy_list 0.0.4 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Gemfile +0 -2
- data/README.md +3 -3
- data/{get_proxy_list.gemspec → getproxylist.gemspec} +3 -3
- data/lib/debug.rb +12 -0
- data/lib/debug.rb~ +12 -0
- data/lib/get_proxy_list.rb +4 -5
- data/lib/get_proxy_list/base.rb +2 -3
- data/lib/get_proxy_list/from_proxy_cn.rb +32 -14
- data/lib/get_proxy_list/from_proxy_cn.rb~ +78 -0
- data/lib/get_proxy_list/from_proxycn.rb +1 -1
- data/lib/get_proxy_list/version.rb +2 -2
- data/lib/getproxylist.rb~ +14 -0
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NWU5MDYwYjNhZGYzMTgyN2Y4M2EzMWMwOTliMDliNmJhMGQyMTM2Mw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MGMyZTI2YjBjYmExYzZiZTNiOGYzMDkyYWY0ZWNiMGM5MGQzMDEyNw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NjU1OWRiY2QwODczNDNkMmMxMmYzZGIzN2M2MWI5YTEyM2I1YTI5YjY1Yjg3
|
10
|
+
NjI3Njg4ZDYxNjVjMzA3NjVlZmIyNjkxNWQ5MjQ3MzczOTliMWI0MDIzNmNj
|
11
|
+
ZjRiNTlkYzhiYmUwMDBjN2VjYTVkYzU0ZDIyMDQxOGQxYTlhMjU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MzhjYjVkNmJkOGYzMDMzZGU0YmU1MGM4ZDQ4M2UwZjY4MTE5YzJmYWJiNmU0
|
14
|
+
ZmZmMGRjYTI1ZjQ1M2U1NzFlYjZkNzNiOTNiNzFjZWFkOWZkY2JkMTVmZTQ2
|
15
|
+
ZTgwY2M1NGFhYjM4YzUyOTdjMjM4NzBiMmRlZTBiMjE5YTY1ZDk=
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Getproxylist
|
2
2
|
|
3
3
|
TODO: Write a gem description
|
4
4
|
|
@@ -6,7 +6,7 @@ TODO: Write a gem description
|
|
6
6
|
|
7
7
|
Add this line to your application's Gemfile:
|
8
8
|
|
9
|
-
gem '
|
9
|
+
gem 'getproxylist'
|
10
10
|
|
11
11
|
And then execute:
|
12
12
|
|
@@ -14,7 +14,7 @@ And then execute:
|
|
14
14
|
|
15
15
|
Or install it yourself as:
|
16
16
|
|
17
|
-
$ gem install
|
17
|
+
$ gem install getproxylist
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
@@ -5,11 +5,11 @@ require 'get_proxy_list/version'
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "get_proxy_list"
|
8
|
-
spec.version =
|
8
|
+
spec.version = Getproxylist::VERSION
|
9
9
|
spec.authors = ["hahazql"]
|
10
10
|
spec.email = ["hahazhouqunli@gmail.com"]
|
11
|
-
spec.description = %q{
|
12
|
-
spec.summary = %q{
|
11
|
+
spec.description = %q{""}
|
12
|
+
spec.summary = %q{""}
|
13
13
|
spec.homepage = ""
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
data/lib/debug.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.expand_path("../get_proxy_list/version",__FILE__)
|
2
|
+
require File.expand_path("../get_proxy_list/from_proxy_cn",__FILE__)
|
3
|
+
require File.expand_path("../get_proxy_list/base",__FILE__)
|
4
|
+
require File.expand_path("../get_proxy_list", __FILE__)
|
5
|
+
|
6
|
+
class Debug
|
7
|
+
# To change this template use File | Settings | File Templates.
|
8
|
+
include GetProxyList
|
9
|
+
list= GetProxyList.get_list(5,4)
|
10
|
+
p "Size: #{list.size}"
|
11
|
+
p "list: #{list}"
|
12
|
+
end
|
data/lib/debug.rb~
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'getproxylist'
|
2
|
+
require "getproxylist/version"
|
3
|
+
require "getproxylist/from_proxy_cn"
|
4
|
+
require "getproxylist/base"
|
5
|
+
|
6
|
+
class Debug
|
7
|
+
# To change this template use File | Settings | File Templates.
|
8
|
+
include GetProxyList
|
9
|
+
list= GetProxyList.get_list(5,4)
|
10
|
+
p "Size: #{list.size}"
|
11
|
+
p "list: #{list}"
|
12
|
+
end
|
data/lib/get_proxy_list.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
|
-
require
|
2
|
-
require "get_proxy_list/
|
3
|
-
require "get_proxy_list/
|
4
|
-
require "get_proxy_list/base"
|
1
|
+
require File.expand_path("../get_proxy_list/version",__FILE__)
|
2
|
+
require File.expand_path("../get_proxy_list/from_proxy_cn",__FILE__)
|
3
|
+
require File.expand_path("../get_proxy_list/base",__FILE__)
|
5
4
|
|
6
5
|
module GetProxyList
|
7
6
|
def self.get_list(limit,page)
|
@@ -11,4 +10,4 @@ module GetProxyList
|
|
11
10
|
proxylist = base.get_proxy_in_time_limit(limit,proxylist)
|
12
11
|
return proxylist
|
13
12
|
end
|
14
|
-
end
|
13
|
+
end
|
data/lib/get_proxy_list/base.rb
CHANGED
@@ -14,8 +14,6 @@ module GetProxyList
|
|
14
14
|
end
|
15
15
|
|
16
16
|
|
17
|
-
|
18
|
-
|
19
17
|
#获取符合时限的代理
|
20
18
|
#limit 时限
|
21
19
|
#proxylist 待筛选的代理列表
|
@@ -27,6 +25,7 @@ module GetProxyList
|
|
27
25
|
begin
|
28
26
|
timeout(limit+1) do
|
29
27
|
doc = Nokogiri::HTML(open("http://www.baidu.com",:proxy=> url))
|
28
|
+
x = doc.css("em")
|
30
29
|
end
|
31
30
|
time_end = Time.now.to_i
|
32
31
|
time_use = time_end - time_start
|
@@ -52,4 +51,4 @@ module GetProxyList
|
|
52
51
|
end
|
53
52
|
|
54
53
|
end
|
55
|
-
end
|
54
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
|
2
|
+
require 'open-uri'
|
3
3
|
|
4
4
|
module GetProxyList
|
5
5
|
class FromProxyCn
|
@@ -15,23 +15,41 @@ module GetProxyList
|
|
15
15
|
#获取代理列表
|
16
16
|
def _getproxylist(url)
|
17
17
|
begin
|
18
|
-
|
18
|
+
hash_port = {"c"=>"1","a"=>"2","z"=>"3","m"=>"4","b"=>"5","w"=>"6","i"=>"7","x"=>"8","l"=>"9","f"=>"0"}
|
19
|
+
html = open(url ).read
|
20
|
+
html.force_encoding("gbk")
|
21
|
+
html.encode!("utf-8", :undef => :replace, :replace => "", :invalid => :replace)
|
22
|
+
doc=Nokogiri::HTML.parse html
|
23
|
+
#doc = Nokogiri::HTML(open(url))
|
19
24
|
proxylist = []
|
20
25
|
doc.css('table').each do |_table|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
26
|
+
_table.css('tr').each do |tr|
|
27
|
+
if !tr.css('td')[0].content.eql?("IP:Port")&&!tr.css('td')[0].content.empty?
|
28
|
+
proxy=tr.css('td')[0].content
|
29
|
+
p "Proxy: #{proxy[1]}"
|
30
|
+
proxy = proxy.split("document.write")
|
31
|
+
_proxy = proxy[1].split(":")
|
32
|
+
port = _proxy[1].gsub(")","")
|
33
|
+
p "port : #{port}"
|
34
|
+
port=port.split("+")
|
35
|
+
_port = ""
|
36
|
+
1.upto(port.length-1).each do |index|
|
37
|
+
p "Hash:#{hash_port[port[index]]}"
|
38
|
+
unless hash_port[port[index]].nil?
|
39
|
+
_port+=hash_port[port[index]]
|
40
|
+
else
|
41
|
+
_port=8080
|
42
|
+
break
|
43
|
+
end
|
44
|
+
end
|
45
|
+
proxylist << {"ip"=>proxy[0],"port"=>_port}
|
46
|
+
end
|
47
|
+
end
|
30
48
|
end
|
31
49
|
p "ProxyList: #{proxylist}"
|
32
50
|
return proxylist
|
33
|
-
|
34
|
-
raise "Get Proxy list Error! Class:'FromProxyCn' Gem:'get_proxy_list' Error:'#{err.to_s}'"
|
51
|
+
#rescue => err
|
52
|
+
# raise "Get Proxy list Error! Class:'FromProxyCn' Gem:'get_proxy_list' Error:'#{err.to_s}'"
|
35
53
|
end
|
36
54
|
end
|
37
55
|
|
@@ -57,4 +75,4 @@ module GetProxyList
|
|
57
75
|
end
|
58
76
|
|
59
77
|
end
|
60
|
-
end
|
78
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
module GetProxyList
|
5
|
+
class FromProxyCn
|
6
|
+
# To change this template use File | Settings | File Templates.
|
7
|
+
include GetProxyList
|
8
|
+
attr_reader :site
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@site = "http://www.cnproxy.com"
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
#获取代理列表
|
16
|
+
def _getproxylist(url)
|
17
|
+
begin
|
18
|
+
hash_port = {"c"=>"1","a"=>"2","z"=>"3","m"=>"4","b"=>"5","w"=>"6","i"=>"7","x"=>"8","l"=>"9","f"=>"0"}
|
19
|
+
html = open(url ).read
|
20
|
+
html.force_encoding("gbk")
|
21
|
+
html.encode!("utf-8", :undef => :replace, :replace => "", :invalid => :replace)
|
22
|
+
doc=Nokogiri::HTML.parse html
|
23
|
+
#doc = Nokogiri::HTML(open(url))
|
24
|
+
proxylist = []
|
25
|
+
doc.css('table').each do |_table|
|
26
|
+
_table.css('tr').each do |tr|
|
27
|
+
if !tr.css('td')[0].content.eql?("IP:Port")&&!tr.css('td')[0].content.empty?
|
28
|
+
proxy=tr.css('td')[0].content
|
29
|
+
p "Proxy: #{proxy[1]}"
|
30
|
+
proxy = proxy.split("document.write")
|
31
|
+
_proxy = proxy[1].split(":")
|
32
|
+
port = _proxy[1].gsub(")","")
|
33
|
+
p "port : #{port}"
|
34
|
+
port=port.split("+")
|
35
|
+
_port = ""
|
36
|
+
1.upto(port.length-1).each do |index|
|
37
|
+
p "Hash:#{hash_port[port[index]]}"
|
38
|
+
unless hash_port[port[index]].nil?
|
39
|
+
_port+=hash_port[port[index]]
|
40
|
+
else
|
41
|
+
_port=8080
|
42
|
+
break
|
43
|
+
end
|
44
|
+
end
|
45
|
+
proxylist << {"ip"=>proxy[0],"port"=>_port}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
p "ProxyList: #{proxylist}"
|
50
|
+
return proxylist
|
51
|
+
#rescue => err
|
52
|
+
# raise "Get Proxy list Error! Class:'FromProxyCn' Gem:'get_proxy_list' Error:'#{err.to_s}'"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
#根据需要的页数获取URL列表
|
58
|
+
#page 将获取的页数
|
59
|
+
def get_url_list(page)
|
60
|
+
urls = []
|
61
|
+
1.upto(page.to_i).each do |_page|
|
62
|
+
urls << @site+"/proxy#{_page.to_s}.html"
|
63
|
+
end
|
64
|
+
return urls
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def get_proxylist(page)
|
69
|
+
urls = get_url_list(page)
|
70
|
+
proxylist = []
|
71
|
+
urls.each do |url|
|
72
|
+
proxylist =proxylist+proxylist|_getproxylist(url)
|
73
|
+
end
|
74
|
+
return proxylist
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
@@ -1,3 +1,3 @@
|
|
1
|
-
module
|
2
|
-
VERSION = "0.0.
|
1
|
+
module Getproxylist
|
2
|
+
VERSION = "0.0.7"
|
3
3
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'getproxylist'
|
2
|
+
require "getproxylist/version"
|
3
|
+
require "getproxylist/from_proxy_cn"
|
4
|
+
require "getproxylist/base"
|
5
|
+
|
6
|
+
module GetProxyList
|
7
|
+
def self.get_list(limit,page)
|
8
|
+
fromProxyCn = FromProxyCn.new()
|
9
|
+
base = Base.new()
|
10
|
+
proxylist = fromProxyCn.get_proxylist(page)
|
11
|
+
proxylist = base.get_proxy_in_time_limit(limit,proxylist)
|
12
|
+
return proxylist
|
13
|
+
end
|
14
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: get_proxy_list
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahazql
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - ! '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
-
description:
|
41
|
+
description: ! '""'
|
42
42
|
email:
|
43
43
|
- hahazhouqunli@gmail.com
|
44
44
|
executables: []
|
@@ -50,12 +50,16 @@ files:
|
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- Rakefile
|
53
|
-
-
|
53
|
+
- getproxylist.gemspec
|
54
|
+
- lib/debug.rb
|
55
|
+
- lib/debug.rb~
|
54
56
|
- lib/get_proxy_list.rb
|
55
57
|
- lib/get_proxy_list/base.rb
|
56
58
|
- lib/get_proxy_list/from_proxy_cn.rb
|
59
|
+
- lib/get_proxy_list/from_proxy_cn.rb~
|
57
60
|
- lib/get_proxy_list/from_proxycn.rb
|
58
61
|
- lib/get_proxy_list/version.rb
|
62
|
+
- lib/getproxylist.rb~
|
59
63
|
homepage: ''
|
60
64
|
licenses:
|
61
65
|
- MIT
|
@@ -79,6 +83,6 @@ rubyforge_project:
|
|
79
83
|
rubygems_version: 2.0.0
|
80
84
|
signing_key:
|
81
85
|
specification_version: 4
|
82
|
-
summary:
|
86
|
+
summary: ! '""'
|
83
87
|
test_files: []
|
84
88
|
has_rdoc:
|