http_proxy_pool 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +81 -0
- data/Rakefile +9 -0
- data/bin/proxypool +60 -0
- data/lib/http_proxy_pool.rb +25 -0
- data/lib/http_proxy_pool/basetask.rb +81 -0
- data/lib/http_proxy_pool/error.rb +8 -0
- data/lib/http_proxy_pool/example/izmoney_china_hight.site +22 -0
- data/lib/http_proxy_pool/example/izmoney_china_normal.site +22 -0
- data/lib/http_proxy_pool/example/izmoney_foreign_high.site +22 -0
- data/lib/http_proxy_pool/example/izmoney_foreign_normal.site +22 -0
- data/lib/http_proxy_pool/example/kuaidaili_inha.site +36 -0
- data/lib/http_proxy_pool/example/kuaidaili_intr.site +36 -0
- data/lib/http_proxy_pool/example/kuaidaili_outha.site +36 -0
- data/lib/http_proxy_pool/example/kuaidaili_outtr.site +36 -0
- data/lib/http_proxy_pool/example/proxy360.site +21 -0
- data/lib/http_proxy_pool/example/proxy_goubanjia_gngn.site +23 -0
- data/lib/http_proxy_pool/example/proxy_goubanjia_gnpt.site +23 -0
- data/lib/http_proxy_pool/example/proxy_goubanjia_gwgn.site +23 -0
- data/lib/http_proxy_pool/example/proxy_goubanjia_gwpt.site +23 -0
- data/lib/http_proxy_pool/example/xicidaili_nn.site +37 -0
- data/lib/http_proxy_pool/example/xicidaili_nt.site +37 -0
- data/lib/http_proxy_pool/example/xicidaili_qq.site +37 -0
- data/lib/http_proxy_pool/example/xicidaili_wn.site +37 -0
- data/lib/http_proxy_pool/example/xicidaili_wt.site +37 -0
- data/lib/http_proxy_pool/proxy.rb +43 -0
- data/lib/http_proxy_pool/proxy_pool.rb +202 -0
- data/lib/http_proxy_pool/utils.rb +30 -0
- data/lib/http_proxy_pool/version.rb +5 -0
- metadata +86 -0
data/README.md
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# http-proxy-pool
|
2
|
+
|
3
|
+
在爬取网页数据、批量投票,点赞等日常中,经常需要更换ip信息,需要大量代理。http-proxy-pool可用于收集网络上免费代理,供其它脚本程序使用。http-proxy-pool可以通过自定义爬取脚本来收集网络代理信息。
|
4
|
+
|
5
|
+
## 安装
|
6
|
+
|
7
|
+
`gem install http-proxy-pool`
|
8
|
+
|
9
|
+
|
10
|
+
## 使用
|
11
|
+
|
12
|
+
##### 1.命令行
|
13
|
+
|
14
|
+
* 初始化资源
|
15
|
+
`proxypool crawl`
|
16
|
+
|
17
|
+
* 查看当前已收集状态
|
18
|
+
`proxypool status`
|
19
|
+
|
20
|
+
* 随机获取一个可用代理,默认强制检查代理是否可用
|
21
|
+
`proxypool get`
|
22
|
+
|
23
|
+
更多参数,参看`proxypool help`
|
24
|
+
|
25
|
+
##### 2.在脚本中引用
|
26
|
+
|
27
|
+
require 'http-proxy-pool'
|
28
|
+
|
29
|
+
pool = HttpProxyPool::ProxyPool.new
|
30
|
+
pool.query(:ip => "=~ /^111/", :proxy_type => "== 'HTTP'") do |proxy|
|
31
|
+
# do what you want ...
|
32
|
+
end
|
33
|
+
|
34
|
+
query查询出proxy资源不会强制,校验是否可用。可用checker通过来校验:
|
35
|
+
|
36
|
+
pool.checker(proxy)
|
37
|
+
|
38
|
+
## 定义爬取脚本
|
39
|
+
http-proxy-pool默认脚本会安装到**[USER\_PATH]/http\_proxy\_pool/script**中,可以自己修改已有脚本,或者在此目录添加新脚本,目前自带以下网站(站点信息源自搜索引擎)爬取脚本:
|
40
|
+
|
41
|
+
* [ip.izmoney.com](http://ip.izmoney.com)
|
42
|
+
* [kuaidaili.com](http://www.kuaidaili.com)
|
43
|
+
* [proxy360.cn](http://www.proxy360.cn)
|
44
|
+
* [goubanjia.com](http://proxy.goubanjia.com)
|
45
|
+
|
46
|
+
##### 一个样例:
|
47
|
+
|
48
|
+
# 开始抓取地址
|
49
|
+
sitetask("start_page_url") do
|
50
|
+
nextpage do
|
51
|
+
# nextpage 最终返回下一页URL
|
52
|
+
# 此部分需判断是否需要是否是最后页
|
53
|
+
# 如果未定义nextpage部分,程序默认只会爬去第一页
|
54
|
+
end
|
55
|
+
|
56
|
+
parser do
|
57
|
+
# 此部分,最终返回一个Proxy实例的数组
|
58
|
+
# 此block中,可以通过解析当前Mechanize页面,通过dom数据生成多个Proxy
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
##### 创建Proxy:
|
63
|
+
|
64
|
+
HttpProxyPool::Proxy.new {
|
65
|
+
:ip => '127.0.0.1', # IP地址
|
66
|
+
:port => 8080, # 端口
|
67
|
+
:username => 'jiyaping', # 认证用户名
|
68
|
+
:password => 'xxxxxx', # 认证密码
|
69
|
+
:proxy_level => 'high', # 代理等级(匿名、透明代理)
|
70
|
+
:proxy_type => 'http', # 代理类型(HTTP、HTTPS、SOCKS)
|
71
|
+
:speed => '0.5', # 代理速度
|
72
|
+
:added_time => DateTime.now, # 添加时间
|
73
|
+
:last_access_time => DateTime.now,# 上次使用时间
|
74
|
+
:nation => 'cn', # 国家
|
75
|
+
:province => 'guangdong', # 省份/州
|
76
|
+
:src_from => 'xxxxxx.com' # 获取来源
|
77
|
+
}
|
78
|
+
|
79
|
+
## 最后
|
80
|
+
|
81
|
+
就酱紫 ...
|
data/Rakefile
ADDED
data/bin/proxypool
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
#! ruby
|
2
|
+
|
3
|
+
lib = File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
$LOAD_PATH.unshift(lib) if File.directory?(lib) && !$LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require 'thor'
|
7
|
+
require "http_proxy_pool"
|
8
|
+
|
9
|
+
class HttpProxyPoolApp < Thor
|
10
|
+
@@proxy_pool = HttpProxyPool::ProxyPool.new(
|
11
|
+
:data_path=> File.join(HttpProxyPool.home, 'ips.yaml'),
|
12
|
+
:script => Dir["#{HttpProxyPool.home}/script/*.site"],
|
13
|
+
:logger => HttpProxyPool.logger
|
14
|
+
)
|
15
|
+
|
16
|
+
desc 'status', 'show proxy pool status.'
|
17
|
+
def status
|
18
|
+
@@proxy_pool.status
|
19
|
+
end
|
20
|
+
|
21
|
+
desc 'crawl [WAY]', 'gather ip source store to local file through WAY.'
|
22
|
+
method_option :lastest, :aliases => '-l',
|
23
|
+
:type => :boolean,
|
24
|
+
:default => true,
|
25
|
+
:desc => 'only crawl recently ip.'
|
26
|
+
method_option :check, :aliases => '-c',
|
27
|
+
:type => :boolean,
|
28
|
+
:default => false,
|
29
|
+
:desc => 'store it after check if available.'
|
30
|
+
def crawl(way = 'script')
|
31
|
+
puts "wait...."
|
32
|
+
|
33
|
+
if way == 'script'
|
34
|
+
lastest = options[:lastest]
|
35
|
+
check = options[:check]
|
36
|
+
|
37
|
+
@@proxy_pool.crawling(lastest, check)
|
38
|
+
end
|
39
|
+
|
40
|
+
puts "done."
|
41
|
+
end
|
42
|
+
|
43
|
+
desc 'get', 'get ip from local storage.'
|
44
|
+
method_option :force_check, :aliases => '-fc',
|
45
|
+
:type => :boolean,
|
46
|
+
:default => true,
|
47
|
+
:desc => 'check the ip if ready to use.'
|
48
|
+
method_option :thread_num, :aliases => '-t',
|
49
|
+
:type => :numeric,
|
50
|
+
:default => 10,
|
51
|
+
:desc => 'num of search thread.'
|
52
|
+
def get
|
53
|
+
force_check = options[:force_check]
|
54
|
+
thread_num = options[:thread_num]
|
55
|
+
|
56
|
+
puts @@proxy_pool.get_random_proxy(force_check, thread_num)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
HttpProxyPoolApp.start
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#encoding : utf-8
|
2
|
+
|
3
|
+
require 'mechanize'
|
4
|
+
|
5
|
+
require 'http_proxy_pool/error'
|
6
|
+
require 'http_proxy_pool/utils'
|
7
|
+
require 'http_proxy_pool/basetask'
|
8
|
+
require 'http_proxy_pool/proxy'
|
9
|
+
require 'http_proxy_pool/proxy_pool'
|
10
|
+
require 'http_proxy_pool/version'
|
11
|
+
|
12
|
+
module HttpProxyPool
|
13
|
+
# will support some configure
|
14
|
+
@config = {}
|
15
|
+
|
16
|
+
@home = File.join(Dir.home, 'http_proxy_pool')
|
17
|
+
Dir.mkdir(@home) unless Dir.exists? @home
|
18
|
+
|
19
|
+
@script_path = File.join(@home, 'script')
|
20
|
+
Dir.mkdir(@script_path) unless Dir.exists? @script_path
|
21
|
+
|
22
|
+
@logger = Logger.new(File.join(@home, 'proxy.log'), 2_000_000)
|
23
|
+
|
24
|
+
init_default_script
|
25
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#encoding : utf-8
|
2
|
+
|
3
|
+
module HttpProxyPool
|
4
|
+
class Basetask
|
5
|
+
attr_accessor :agent,
|
6
|
+
:url,
|
7
|
+
:logger,
|
8
|
+
:page_parser,
|
9
|
+
:next_page
|
10
|
+
|
11
|
+
def initialize(opts = {})
|
12
|
+
@agent = opts[:agent]
|
13
|
+
@logger = opts[:logger]
|
14
|
+
@url = opts[:url]
|
15
|
+
end
|
16
|
+
|
17
|
+
def sitetask(url, opts = {})
|
18
|
+
raise ScriptError.new("script do not specify a url!") unless url
|
19
|
+
|
20
|
+
@url = url
|
21
|
+
@agent = opts[:agent] || Mechanize.new
|
22
|
+
@logger ||= opts[:logger]
|
23
|
+
|
24
|
+
#for debug
|
25
|
+
#@agent.set_proxy '127.0.0.1', 8888
|
26
|
+
|
27
|
+
yield
|
28
|
+
end
|
29
|
+
|
30
|
+
def ips(lastest = true)
|
31
|
+
uri = @url
|
32
|
+
|
33
|
+
loop do
|
34
|
+
@logger.info("start crawling page [#{uri}] ...")
|
35
|
+
@agent.get(uri)
|
36
|
+
# get all page need sleep a random time
|
37
|
+
rand_sleep unless lastest
|
38
|
+
|
39
|
+
begin
|
40
|
+
instance_eval(&page_parser).each do |field|
|
41
|
+
yield field
|
42
|
+
end
|
43
|
+
rescue Exception => e
|
44
|
+
@logger.error("parsing page error[#{uri}]. #{e.to_s}")
|
45
|
+
break
|
46
|
+
end
|
47
|
+
|
48
|
+
begin
|
49
|
+
break unless @next_page
|
50
|
+
uri = instance_eval(&next_page)
|
51
|
+
break unless uri
|
52
|
+
rescue => e
|
53
|
+
@logger.error("error occoured when get next page[#{uri}]. #{e.to_s}")
|
54
|
+
break
|
55
|
+
end
|
56
|
+
|
57
|
+
break if lastest
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def parser(&block)
|
62
|
+
@page_parser = block if block_given?
|
63
|
+
end
|
64
|
+
|
65
|
+
def nextpage(&block)
|
66
|
+
@next_page = block if block_given?
|
67
|
+
end
|
68
|
+
|
69
|
+
def curr_page
|
70
|
+
@agent.page.uri
|
71
|
+
end
|
72
|
+
|
73
|
+
def sitename
|
74
|
+
URI.parse(URI.encode(@url)).host
|
75
|
+
end
|
76
|
+
|
77
|
+
def rand_sleep(max_tick = 2)
|
78
|
+
sleep rand(max_tick)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
sitetask("http://ip.izmoney.com/search/china/high/index.html") do
|
2
|
+
parser do
|
3
|
+
ips = []
|
4
|
+
|
5
|
+
agent.page.search("tbody").search("tr").each do |node|
|
6
|
+
tds = node.search('td')
|
7
|
+
fields = {}
|
8
|
+
|
9
|
+
fields[:ip] = tds[0].text
|
10
|
+
fields[:port] = tds[1].text
|
11
|
+
fields[:nation] = tds[2].text
|
12
|
+
fields[:proxy_level]= tds[4].text
|
13
|
+
fields[:proxy_type] = tds[5].text
|
14
|
+
fields[:added_time] = DateTime.now
|
15
|
+
fields[:src_from] = sitename
|
16
|
+
|
17
|
+
ips << fields
|
18
|
+
end
|
19
|
+
|
20
|
+
ips
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
sitetask("http://ip.izmoney.com/search/china/normal/index.html") do
|
2
|
+
parser do
|
3
|
+
ips = []
|
4
|
+
|
5
|
+
agent.page.search("tbody").search("tr").each do |node|
|
6
|
+
tds = node.search('td')
|
7
|
+
fields = {}
|
8
|
+
|
9
|
+
fields[:ip] = tds[0].text
|
10
|
+
fields[:port] = tds[1].text
|
11
|
+
fields[:nation] = tds[2].text
|
12
|
+
fields[:proxy_level]= tds[4].text
|
13
|
+
fields[:proxy_type] = tds[5].text
|
14
|
+
fields[:added_time] = DateTime.now
|
15
|
+
fields[:src_from] = sitename
|
16
|
+
|
17
|
+
ips << fields
|
18
|
+
end
|
19
|
+
|
20
|
+
ips
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
sitetask("http://ip.izmoney.com/search/foreign/high/index.html") do
|
2
|
+
parser do
|
3
|
+
ips = []
|
4
|
+
|
5
|
+
agent.page.search("tbody").search("tr").each do |node|
|
6
|
+
tds = node.search('td')
|
7
|
+
fields = {}
|
8
|
+
|
9
|
+
fields[:ip] = tds[0].text
|
10
|
+
fields[:port] = tds[1].text
|
11
|
+
fields[:nation] = tds[2].text
|
12
|
+
fields[:proxy_level]= tds[4].text
|
13
|
+
fields[:proxy_type] = tds[5].text
|
14
|
+
fields[:added_time] = DateTime.now
|
15
|
+
fields[:src_from] = sitename
|
16
|
+
|
17
|
+
ips << fields
|
18
|
+
end
|
19
|
+
|
20
|
+
ips
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
sitetask("http://ip.izmoney.com/search/foreign/normal/index.html") do
|
2
|
+
parser do
|
3
|
+
ips = []
|
4
|
+
|
5
|
+
agent.page.search("tbody").search("tr").each do |node|
|
6
|
+
tds = node.search('td')
|
7
|
+
fields = {}
|
8
|
+
|
9
|
+
fields[:ip] = tds[0].text
|
10
|
+
fields[:port] = tds[1].text
|
11
|
+
fields[:nation] = tds[2].text
|
12
|
+
fields[:proxy_level]= tds[4].text
|
13
|
+
fields[:proxy_type] = tds[5].text
|
14
|
+
fields[:added_time] = DateTime.now
|
15
|
+
fields[:src_from] = sitename
|
16
|
+
|
17
|
+
ips << fields
|
18
|
+
end
|
19
|
+
|
20
|
+
ips
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
sitetask("http://www.kuaidaili.com/free/inha/") do
|
2
|
+
nextpage do
|
3
|
+
curr_idx = 0
|
4
|
+
|
5
|
+
if agent.page.at('.active')
|
6
|
+
curr_idx = agent.page.at('.active').text.to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
last_page = agent.page.at("#listnav").search("a[href*='/free']").last.text.to_i
|
10
|
+
return if curr_idx == last_page
|
11
|
+
|
12
|
+
File.join(url, (curr_idx + 1).to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
parser do
|
16
|
+
ips = []
|
17
|
+
|
18
|
+
agent.page.search("tbody").search("tr").each do |node|
|
19
|
+
tds = node.search('td')
|
20
|
+
fields = {}
|
21
|
+
|
22
|
+
fields[:ip] = tds[0].text
|
23
|
+
fields[:port] = tds[1].text
|
24
|
+
fields[:proxy_level]= tds[2].text
|
25
|
+
fields[:proxy_type] = tds[3].text
|
26
|
+
fields[:province] = tds[4].at('a').text if tds[4].at('a')
|
27
|
+
fields[:speed] = tds[5].text
|
28
|
+
fields[:added_time] = tds[6].text
|
29
|
+
fields[:src_from] = sitename
|
30
|
+
|
31
|
+
ips << fields
|
32
|
+
end
|
33
|
+
|
34
|
+
ips
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
sitetask("http://www.kuaidaili.com/free/intr/") do
|
2
|
+
nextpage do
|
3
|
+
curr_idx = 0
|
4
|
+
|
5
|
+
if agent.page.at('.active')
|
6
|
+
curr_idx = agent.page.at('.active').text.to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
last_page = agent.page.at("#listnav").search("a[href*='/free']").last.text.to_i
|
10
|
+
return if curr_idx == last_page
|
11
|
+
|
12
|
+
File.join(url, (curr_idx + 1).to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
parser do
|
16
|
+
ips = []
|
17
|
+
|
18
|
+
agent.page.search("tbody").search("tr").each do |node|
|
19
|
+
tds = node.search('td')
|
20
|
+
fields = {}
|
21
|
+
|
22
|
+
fields[:ip] = tds[0].text
|
23
|
+
fields[:port] = tds[1].text
|
24
|
+
fields[:proxy_level]= tds[2].text
|
25
|
+
fields[:proxy_type] = tds[3].text
|
26
|
+
fields[:province] = tds[4].at('a').text if tds[4].at('a')
|
27
|
+
fields[:speed] = tds[5].text
|
28
|
+
fields[:added_time] = tds[6].text
|
29
|
+
fields[:src_from] = sitename
|
30
|
+
|
31
|
+
ips << fields
|
32
|
+
end
|
33
|
+
|
34
|
+
ips
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
sitetask("http://www.kuaidaili.com/free/outha/") do
|
2
|
+
nextpage do
|
3
|
+
curr_idx = 0
|
4
|
+
|
5
|
+
if agent.page.at('.active')
|
6
|
+
curr_idx = agent.page.at('.active').text.to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
last_page = agent.page.at("#listnav").search("a[href*='/free']").last.text.to_i
|
10
|
+
return if curr_idx == last_page
|
11
|
+
|
12
|
+
File.join(url, (curr_idx + 1).to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
parser do
|
16
|
+
ips = []
|
17
|
+
|
18
|
+
agent.page.search("tbody").search("tr").each do |node|
|
19
|
+
tds = node.search('td')
|
20
|
+
fields = {}
|
21
|
+
|
22
|
+
fields[:ip] = tds[0].text
|
23
|
+
fields[:port] = tds[1].text
|
24
|
+
fields[:proxy_level]= tds[2].text
|
25
|
+
fields[:proxy_type] = tds[3].text
|
26
|
+
fields[:province] = tds[4].at('a').text if tds[4].at('a')
|
27
|
+
fields[:speed] = tds[5].text
|
28
|
+
fields[:added_time] = tds[6].text
|
29
|
+
fields[:src_from] = sitename
|
30
|
+
|
31
|
+
ips << fields
|
32
|
+
end
|
33
|
+
|
34
|
+
ips
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
sitetask("http://www.kuaidaili.com/free/outtr/") do
|
2
|
+
nextpage do
|
3
|
+
curr_idx = 0
|
4
|
+
|
5
|
+
if agent.page.at('.active')
|
6
|
+
curr_idx = agent.page.at('.active').text.to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
last_page = agent.page.at("#listnav").search("a[href*='/free']").last.text.to_i
|
10
|
+
return if curr_idx == last_page
|
11
|
+
|
12
|
+
File.join(url, (curr_idx + 1).to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
parser do
|
16
|
+
ips = []
|
17
|
+
|
18
|
+
agent.page.search("tbody").search("tr").each do |node|
|
19
|
+
tds = node.search('td')
|
20
|
+
fields = {}
|
21
|
+
|
22
|
+
fields[:ip] = tds[0].text
|
23
|
+
fields[:port] = tds[1].text
|
24
|
+
fields[:proxy_level]= tds[2].text
|
25
|
+
fields[:proxy_type] = tds[3].text
|
26
|
+
fields[:province] = tds[4].at('a').text if tds[4].at('a')
|
27
|
+
fields[:speed] = tds[5].text
|
28
|
+
fields[:added_time] = tds[6].text
|
29
|
+
fields[:src_from] = sitename
|
30
|
+
|
31
|
+
ips << fields
|
32
|
+
end
|
33
|
+
|
34
|
+
ips
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
sitetask("http://www.proxy360.cn/default.aspx") do
|
2
|
+
parser do
|
3
|
+
ips = []
|
4
|
+
|
5
|
+
agent.page.search(".proxylistitem").each do |node|
|
6
|
+
tds = node.search('.tbBottomLine')
|
7
|
+
fields = {}
|
8
|
+
|
9
|
+
fields[:ip] = tds[0].text.strip
|
10
|
+
fields[:port] = tds[1].text.strip
|
11
|
+
fields[:proxy_level]= tds[2].text.strip
|
12
|
+
fields[:nation] = tds[3].text.strip
|
13
|
+
fields[:added_time] = tds[4].text.strip
|
14
|
+
fields[:src_from] = sitename
|
15
|
+
|
16
|
+
ips << fields
|
17
|
+
end
|
18
|
+
|
19
|
+
ips
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
sitetask("http://proxy.goubanjia.com/free/gngn/index.shtml") do
|
2
|
+
parser do
|
3
|
+
ips = []
|
4
|
+
|
5
|
+
agent.page.search("tbody").search("tr").each do |node|
|
6
|
+
tds = node.search('td')
|
7
|
+
fields = {}
|
8
|
+
|
9
|
+
fields[:ip] = tds[0].search(":not(p[style='display: none;'])").text
|
10
|
+
fields[:port] = tds[1].text
|
11
|
+
fields[:proxy_level]= tds[2].text
|
12
|
+
fields[:proxy_type] = tds[3].text
|
13
|
+
fields[:nation] = tds[4].text
|
14
|
+
fields[:province] = tds[5].text
|
15
|
+
fields[:added_time] = DateTime.now
|
16
|
+
fields[:src_from] = sitename
|
17
|
+
|
18
|
+
ips << fields
|
19
|
+
end
|
20
|
+
|
21
|
+
ips
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
sitetask("http://proxy.goubanjia.com/free/gnpt/index.shtml") do
|
2
|
+
parser do
|
3
|
+
ips = []
|
4
|
+
|
5
|
+
agent.page.search("tbody").search("tr").each do |node|
|
6
|
+
tds = node.search('td')
|
7
|
+
fields = {}
|
8
|
+
|
9
|
+
fields[:ip] = tds[0].search(":not(p[style='display: none;'])").text
|
10
|
+
fields[:port] = tds[1].text
|
11
|
+
fields[:proxy_level]= tds[2].text
|
12
|
+
fields[:proxy_type] = tds[3].text
|
13
|
+
fields[:nation] = tds[4].text
|
14
|
+
fields[:province] = tds[5].text
|
15
|
+
fields[:added_time] = DateTime.now
|
16
|
+
fields[:src_from] = sitename
|
17
|
+
|
18
|
+
ips << fields
|
19
|
+
end
|
20
|
+
|
21
|
+
ips
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
sitetask("http://proxy.goubanjia.com/free/gwgn/index.shtml") do
|
2
|
+
parser do
|
3
|
+
ips = []
|
4
|
+
|
5
|
+
agent.page.search("tbody").search("tr").each do |node|
|
6
|
+
tds = node.search('td')
|
7
|
+
fields = {}
|
8
|
+
|
9
|
+
fields[:ip] = tds[0].search(":not(p[style='display: none;'])").text
|
10
|
+
fields[:port] = tds[1].text
|
11
|
+
fields[:proxy_level]= tds[2].text
|
12
|
+
fields[:proxy_type] = tds[3].text
|
13
|
+
fields[:nation] = tds[4].text
|
14
|
+
fields[:province] = tds[5].text
|
15
|
+
fields[:added_time] = DateTime.now
|
16
|
+
fields[:src_from] = sitename
|
17
|
+
|
18
|
+
ips << fields
|
19
|
+
end
|
20
|
+
|
21
|
+
ips
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
sitetask("http://proxy.goubanjia.com/free/gwpt/index.shtml") do
|
2
|
+
parser do
|
3
|
+
ips = []
|
4
|
+
|
5
|
+
agent.page.search("tbody").search("tr").each do |node|
|
6
|
+
tds = node.search('td')
|
7
|
+
fields = {}
|
8
|
+
|
9
|
+
fields[:ip] = tds[0].search(":not(p[style='display: none;'])").text
|
10
|
+
fields[:port] = tds[1].text
|
11
|
+
fields[:proxy_level]= tds[2].text
|
12
|
+
fields[:proxy_type] = tds[3].text
|
13
|
+
fields[:nation] = tds[4].text
|
14
|
+
fields[:province] = tds[5].text
|
15
|
+
fields[:added_time] = DateTime.now
|
16
|
+
fields[:src_from] = sitename
|
17
|
+
|
18
|
+
ips << fields
|
19
|
+
end
|
20
|
+
|
21
|
+
ips
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
sitetask("http://www.xicidaili.com/nn/") do
|
2
|
+
nextpage do
|
3
|
+
curr_idx = 0
|
4
|
+
|
5
|
+
if agent.page.at('.current')
|
6
|
+
curr_idx = agent.page.at('.current').text.to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
last_page = agent.page.at(".pagination").search("a[href*='/nn/']").last.text.to_i
|
10
|
+
return if curr_idx == last_page
|
11
|
+
|
12
|
+
File.join(url, (curr_idx + 1).to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
parser do
|
16
|
+
ips = []
|
17
|
+
|
18
|
+
agent.page.search("#ip_list").search("tr")[1..-1].each do |node|
|
19
|
+
tds = node.search('td')
|
20
|
+
fields = {}
|
21
|
+
|
22
|
+
fields[:nation] = tds[1].at('img')['alt'] if tds[1].at('img')
|
23
|
+
fields[:ip] = tds[2].text
|
24
|
+
fields[:port] = tds[3].text
|
25
|
+
fields[:province] = tds[4].at('a').text if tds[4].at('a')
|
26
|
+
fields[:proxy_level]= tds[5].text
|
27
|
+
fields[:proxy_type] = tds[6].text
|
28
|
+
fields[:speed] = tds[7].at('div')["title"] if tds[7].at('div')
|
29
|
+
fields[:added_time] = tds[9].text
|
30
|
+
fields[:src_from] = sitename
|
31
|
+
|
32
|
+
ips << fields
|
33
|
+
end
|
34
|
+
|
35
|
+
ips
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
sitetask("http://www.xicidaili.com/nt/") do
|
2
|
+
nextpage do
|
3
|
+
curr_idx = 0
|
4
|
+
|
5
|
+
if agent.page.at('.current')
|
6
|
+
curr_idx = agent.page.at('.current').text.to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
last_page = agent.page.at(".pagination").search("a[href*='/nt/']").last.text.to_i
|
10
|
+
return if curr_idx == last_page
|
11
|
+
|
12
|
+
File.join(url, (curr_idx + 1).to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
parser do
|
16
|
+
ips = []
|
17
|
+
|
18
|
+
agent.page.search("#ip_list").search("tr")[1..-1].each do |node|
|
19
|
+
tds = node.search('td')
|
20
|
+
fields = {}
|
21
|
+
|
22
|
+
fields[:nation] = tds[1].at('img')['alt'] if tds[1].at('img')
|
23
|
+
fields[:ip] = tds[2].text
|
24
|
+
fields[:port] = tds[3].text
|
25
|
+
fields[:province] = tds[4].at('a').text if tds[4].at('a')
|
26
|
+
fields[:proxy_level]= tds[5].text
|
27
|
+
fields[:proxy_type] = tds[6].text
|
28
|
+
fields[:speed] = tds[7].at('div')["title"] if tds[7].at('div')
|
29
|
+
fields[:added_time] = tds[9].text
|
30
|
+
fields[:src_from] = sitename
|
31
|
+
|
32
|
+
ips << fields
|
33
|
+
end
|
34
|
+
|
35
|
+
ips
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
sitetask("http://www.xicidaili.com/qq/") do
|
2
|
+
nextpage do
|
3
|
+
curr_idx = 0
|
4
|
+
|
5
|
+
if agent.page.at('.current')
|
6
|
+
curr_idx = agent.page.at('.current').text.to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
last_page = agent.page.at(".pagination").search("a[href*='/qq/']").last.text.to_i
|
10
|
+
return if curr_idx == last_page
|
11
|
+
|
12
|
+
File.join(url, (curr_idx + 1).to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
parser do
|
16
|
+
ips = []
|
17
|
+
|
18
|
+
agent.page.search("#ip_list").search("tr")[1..-1].each do |node|
|
19
|
+
tds = node.search('td')
|
20
|
+
fields = {}
|
21
|
+
|
22
|
+
fields[:nation] = tds[1].at('img')['alt'] if tds[1].at('img')
|
23
|
+
fields[:ip] = tds[2].text
|
24
|
+
fields[:port] = tds[3].text
|
25
|
+
fields[:province] = tds[4].at('a').text if tds[4].at('a')
|
26
|
+
fields[:proxy_level]= tds[5].text
|
27
|
+
fields[:proxy_type] = tds[6].text
|
28
|
+
fields[:speed] = tds[7].at('div')["title"] if tds[7].at('div')
|
29
|
+
fields[:added_time] = tds[9].text
|
30
|
+
fields[:src_from] = sitename
|
31
|
+
|
32
|
+
ips << fields
|
33
|
+
end
|
34
|
+
|
35
|
+
ips
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
sitetask("http://www.xicidaili.com/wn/") do
|
2
|
+
nextpage do
|
3
|
+
curr_idx = 0
|
4
|
+
|
5
|
+
if agent.page.at('.current')
|
6
|
+
curr_idx = agent.page.at('.current').text.to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
last_page = agent.page.at(".pagination").search("a[href*='/wn/']").last.text.to_i
|
10
|
+
return if curr_idx == last_page
|
11
|
+
|
12
|
+
File.join(url, (curr_idx + 1).to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
parser do
|
16
|
+
ips = []
|
17
|
+
|
18
|
+
agent.page.search("#ip_list").search("tr")[1..-1].each do |node|
|
19
|
+
tds = node.search('td')
|
20
|
+
fields = {}
|
21
|
+
|
22
|
+
fields[:nation] = tds[1].at('img')['alt'] if tds[1].at('img')
|
23
|
+
fields[:ip] = tds[2].text
|
24
|
+
fields[:port] = tds[3].text
|
25
|
+
fields[:province] = tds[4].at('a').text if tds[4].at('a')
|
26
|
+
fields[:proxy_level]= tds[5].text
|
27
|
+
fields[:proxy_type] = tds[6].text
|
28
|
+
fields[:speed] = tds[7].at('div')["title"] if tds[7].at('div')
|
29
|
+
fields[:added_time] = tds[9].text
|
30
|
+
fields[:src_from] = sitename
|
31
|
+
|
32
|
+
ips << fields
|
33
|
+
end
|
34
|
+
|
35
|
+
ips
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
sitetask("http://www.xicidaili.com/wt/") do
|
2
|
+
nextpage do
|
3
|
+
curr_idx = 0
|
4
|
+
|
5
|
+
if agent.page.at('.current')
|
6
|
+
curr_idx = agent.page.at('.current').text.to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
last_page = agent.page.at(".pagination").search("a[href*='/wt/']").last.text.to_i
|
10
|
+
return if curr_idx == last_page
|
11
|
+
|
12
|
+
File.join(url, (curr_idx + 1).to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
parser do
|
16
|
+
ips = []
|
17
|
+
|
18
|
+
agent.page.search("#ip_list").search("tr")[1..-1].each do |node|
|
19
|
+
tds = node.search('td')
|
20
|
+
fields = {}
|
21
|
+
|
22
|
+
fields[:nation] = tds[1].at('img')['alt'] if tds[1].at('img')
|
23
|
+
fields[:ip] = tds[2].text
|
24
|
+
fields[:port] = tds[3].text
|
25
|
+
fields[:province] = tds[4].at('a').text if tds[4].at('a')
|
26
|
+
fields[:proxy_level]= tds[5].text
|
27
|
+
fields[:proxy_type] = tds[6].text
|
28
|
+
fields[:speed] = tds[7].at('div')["title"] if tds[7].at('div')
|
29
|
+
fields[:added_time] = tds[9].text
|
30
|
+
fields[:src_from] = sitename
|
31
|
+
|
32
|
+
ips << fields
|
33
|
+
end
|
34
|
+
|
35
|
+
ips
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding : utf-8
|
2
|
+
|
3
|
+
module HttpProxyPool
|
4
|
+
class Proxy
|
5
|
+
attr_accessor :ip,
|
6
|
+
:port,
|
7
|
+
:username,
|
8
|
+
:password,
|
9
|
+
:proxy_level,
|
10
|
+
:proxy_type,
|
11
|
+
:speed,
|
12
|
+
:added_time,
|
13
|
+
:last_access_time,
|
14
|
+
:nation,
|
15
|
+
:province,
|
16
|
+
:src_from,
|
17
|
+
:try_times
|
18
|
+
|
19
|
+
def initialize(args = {})
|
20
|
+
@ip = args[:ip]
|
21
|
+
@port = args[:port]
|
22
|
+
@username = args[:username] || ''
|
23
|
+
@password = args[:password] || ''
|
24
|
+
@proxy_type = args[:proxy_type]
|
25
|
+
@proxy_level= args[:proxy_level]
|
26
|
+
@speed = args[:speed]
|
27
|
+
@added_time = args[:added_time]
|
28
|
+
@last_access= args[:last_access]
|
29
|
+
@nation = args[:nation]
|
30
|
+
@province = args[:province]
|
31
|
+
@src_from = args[:src_from]
|
32
|
+
@try_times = args[:try_times] || 0
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_arr
|
36
|
+
[@ip, @port, @proxy_type, @proxy_level, @nation, @province]
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_s
|
40
|
+
"#{@ip}\t#{@port}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
#encoding : utf-8
|
2
|
+
|
3
|
+
module HttpProxyPool
|
4
|
+
class ProxyPool
|
5
|
+
attr_accessor :proxys, :logger
|
6
|
+
|
7
|
+
def initialize(args = {})
|
8
|
+
@data_path = args[:data_path] || File.join(HttpProxyPool.home, 'ips.yaml')
|
9
|
+
@script = args[:script] || Dir["#{HttpProxyPool.home}/script/*.site"]
|
10
|
+
@logger = args[:logger] || HttpProxyPool.logger
|
11
|
+
@proxys = []
|
12
|
+
|
13
|
+
@agent = Mechanize.new
|
14
|
+
@agent.user_agent_alias = get_agent_alias
|
15
|
+
|
16
|
+
load_proxy if File.exists? @data_path
|
17
|
+
end
|
18
|
+
|
19
|
+
def status
|
20
|
+
puts "proxy count : #{@proxys.size}"
|
21
|
+
end
|
22
|
+
|
23
|
+
# query interface
|
24
|
+
def query(args = {})
|
25
|
+
begin
|
26
|
+
selected_proxy = @proxys.select do |proxy|
|
27
|
+
instance_eval(build_query_parameter('proxy', args))
|
28
|
+
end
|
29
|
+
rescue => e
|
30
|
+
raise QueryError.new("query parameter error!")
|
31
|
+
end
|
32
|
+
|
33
|
+
return selected_proxy unless block_given?
|
34
|
+
|
35
|
+
selected_proxy.each do |proxy|
|
36
|
+
yield proxy
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def build_query_parameter(prefix = 'proxy', args)
|
41
|
+
condition_str = ''
|
42
|
+
|
43
|
+
args = query_key_filter(args)
|
44
|
+
|
45
|
+
args.each do |key, express|
|
46
|
+
condition_str << "#{prefix}.#{key} #{express} && "
|
47
|
+
end
|
48
|
+
|
49
|
+
condition_str.sub!(/\s?&&\s?$/, '')
|
50
|
+
|
51
|
+
condition_str
|
52
|
+
end
|
53
|
+
|
54
|
+
def query_key_filter(args)
|
55
|
+
proxy = Proxy.new
|
56
|
+
args.select{ |k| proxy.respond_to? k }
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_random_proxy(check = true, thread_num = 10)
|
60
|
+
mutex = Mutex.new
|
61
|
+
result = nil
|
62
|
+
thread_list = []
|
63
|
+
|
64
|
+
begin
|
65
|
+
thread_num.times do |thread|
|
66
|
+
thread_list << Thread.new do
|
67
|
+
while(!result)
|
68
|
+
proxy = @proxys[rand(@proxys.size)]
|
69
|
+
@logger.info("using #{proxy}.")
|
70
|
+
proxy = checker(proxy) if check
|
71
|
+
|
72
|
+
if proxy.is_a? Proxy
|
73
|
+
mutex.synchronize do
|
74
|
+
result = proxy
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
thread_list.each { |t| t.join }
|
82
|
+
rescue => e
|
83
|
+
@logger.error("find proxy error. #{e.to_s}")
|
84
|
+
ensure
|
85
|
+
save_proxy
|
86
|
+
end
|
87
|
+
|
88
|
+
result
|
89
|
+
end
|
90
|
+
|
91
|
+
def crawling(lastest = true, check = false)
|
92
|
+
@script.each do |file|
|
93
|
+
begin
|
94
|
+
task = Basetask.new(:agent => @agent,:logger => @logger)
|
95
|
+
task.instance_eval(read_taskfile(file))
|
96
|
+
|
97
|
+
task.ips(lastest) do |fields|
|
98
|
+
proxy = Proxy.new(fields)
|
99
|
+
(next unless checker(proxy)) if check
|
100
|
+
@proxys << proxy unless include?(proxy)
|
101
|
+
end
|
102
|
+
rescue => e
|
103
|
+
@logger.error(e)
|
104
|
+
ensure
|
105
|
+
save_proxy
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def include?(proxy)
|
111
|
+
@proxys.select{ |p| p.ip == proxy.ip}.size > 0
|
112
|
+
end
|
113
|
+
|
114
|
+
def save_proxy
|
115
|
+
file = File.open(@data_path, 'w')
|
116
|
+
YAML.dump(@proxys, file)
|
117
|
+
file.close
|
118
|
+
end
|
119
|
+
|
120
|
+
def load_proxy
|
121
|
+
@proxys = YAML.load_file(@data_path)
|
122
|
+
end
|
123
|
+
|
124
|
+
def read_taskfile(file)
|
125
|
+
cnt = ''
|
126
|
+
File.open(file) do |f|
|
127
|
+
while(line = f.gets)
|
128
|
+
cnt << line
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
cnt
|
133
|
+
end
|
134
|
+
|
135
|
+
def get_agent_alias
|
136
|
+
agent_arr = [
|
137
|
+
'Linux Firefox',
|
138
|
+
'Linux Mozilla',
|
139
|
+
'Mac Firefox',
|
140
|
+
'Mac Mozilla',
|
141
|
+
'Mac Safari',
|
142
|
+
'Windows Chrome',
|
143
|
+
'Windows IE 7',
|
144
|
+
'Windows IE 8',
|
145
|
+
'Windows IE 9',
|
146
|
+
'Windows Mozilla',
|
147
|
+
'iPhone',
|
148
|
+
'iPad',
|
149
|
+
'Android']
|
150
|
+
|
151
|
+
agent_arr[rand(agent_arr.size)]
|
152
|
+
end
|
153
|
+
|
154
|
+
def checker(proxy)
|
155
|
+
if proxy.is_a? Array
|
156
|
+
checker_batch(proxy)
|
157
|
+
else
|
158
|
+
checker_single(proxy)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def checker_batch(proxys, task_count = 5)
|
163
|
+
result = []
|
164
|
+
mutex = Mutex.new
|
165
|
+
thread_count = (proxys.size / task_count.to_f).ceil
|
166
|
+
|
167
|
+
thread_count.times do |thread_idx|
|
168
|
+
(Thread.new do
|
169
|
+
start_idx = thread_idx * task_count
|
170
|
+
end_idx = (thread_idx + 1) * task_count
|
171
|
+
end_idx = proxys.size if end_idx > proxys.size
|
172
|
+
|
173
|
+
proxys[start_idx..end_idx].each do |proxy|
|
174
|
+
p = checker_single(proxy)
|
175
|
+
|
176
|
+
mutex.synchronize do
|
177
|
+
result<< p if p
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end).join
|
181
|
+
end
|
182
|
+
|
183
|
+
result
|
184
|
+
end
|
185
|
+
|
186
|
+
def checker_single(proxy, timeout = 0.05)
|
187
|
+
http = Net::HTTP.new('baidu.com', 80, proxy.ip, proxy.port)
|
188
|
+
http.open_timeout = timeout
|
189
|
+
http.read_timeout = timeout * 10
|
190
|
+
|
191
|
+
begin
|
192
|
+
return proxy if http.get('/').code =~ /^[1|2|3|4]/
|
193
|
+
rescue => e
|
194
|
+
@logger.info("can not connect proxy.[#{proxy}].#{e.to_s}")
|
195
|
+
@proxys.delete(proxy)
|
196
|
+
@logger.info("delete disabled proxy [#{proxy}].")
|
197
|
+
end
|
198
|
+
|
199
|
+
false
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#encoding : utf-8
|
2
|
+
|
3
|
+
module HttpProxyPool
|
4
|
+
module_function
|
5
|
+
|
6
|
+
def init_default_script
|
7
|
+
|
8
|
+
target_dir = Dir.new(@script_path)
|
9
|
+
|
10
|
+
src_dir = File.join(File.dirname(__FILE__), 'example')
|
11
|
+
Dir.entries(src_dir).each do |src|
|
12
|
+
next unless src.end_with? '.site'
|
13
|
+
|
14
|
+
FileUtils.cp File.join(src_dir, src),
|
15
|
+
target_dir.path unless target_dir.include? src
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def home
|
20
|
+
@home
|
21
|
+
end
|
22
|
+
|
23
|
+
def script_path
|
24
|
+
@script_path
|
25
|
+
end
|
26
|
+
|
27
|
+
def logger
|
28
|
+
@logger
|
29
|
+
end
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: http_proxy_pool
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- jiyaping
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-09-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mechanize
|
16
|
+
requirement: &10417392 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '2.7'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *10417392
|
25
|
+
description: Gather free http proxy data
|
26
|
+
email: jiyaping0802@gmail.com
|
27
|
+
executables:
|
28
|
+
- proxypool
|
29
|
+
extensions: []
|
30
|
+
extra_rdoc_files: []
|
31
|
+
files:
|
32
|
+
- lib/http_proxy_pool/basetask.rb
|
33
|
+
- lib/http_proxy_pool/error.rb
|
34
|
+
- lib/http_proxy_pool/example/izmoney_china_hight.site
|
35
|
+
- lib/http_proxy_pool/example/izmoney_china_normal.site
|
36
|
+
- lib/http_proxy_pool/example/izmoney_foreign_high.site
|
37
|
+
- lib/http_proxy_pool/example/izmoney_foreign_normal.site
|
38
|
+
- lib/http_proxy_pool/example/kuaidaili_inha.site
|
39
|
+
- lib/http_proxy_pool/example/kuaidaili_intr.site
|
40
|
+
- lib/http_proxy_pool/example/kuaidaili_outha.site
|
41
|
+
- lib/http_proxy_pool/example/kuaidaili_outtr.site
|
42
|
+
- lib/http_proxy_pool/example/proxy360.site
|
43
|
+
- lib/http_proxy_pool/example/proxy_goubanjia_gngn.site
|
44
|
+
- lib/http_proxy_pool/example/proxy_goubanjia_gnpt.site
|
45
|
+
- lib/http_proxy_pool/example/proxy_goubanjia_gwgn.site
|
46
|
+
- lib/http_proxy_pool/example/proxy_goubanjia_gwpt.site
|
47
|
+
- lib/http_proxy_pool/example/xicidaili_nn.site
|
48
|
+
- lib/http_proxy_pool/example/xicidaili_nt.site
|
49
|
+
- lib/http_proxy_pool/example/xicidaili_qq.site
|
50
|
+
- lib/http_proxy_pool/example/xicidaili_wn.site
|
51
|
+
- lib/http_proxy_pool/example/xicidaili_wt.site
|
52
|
+
- lib/http_proxy_pool/proxy.rb
|
53
|
+
- lib/http_proxy_pool/proxy_pool.rb
|
54
|
+
- lib/http_proxy_pool/utils.rb
|
55
|
+
- lib/http_proxy_pool/version.rb
|
56
|
+
- lib/http_proxy_pool.rb
|
57
|
+
- Rakefile
|
58
|
+
- README.md
|
59
|
+
- !binary |-
|
60
|
+
YmluL3Byb3h5cG9vbA==
|
61
|
+
homepage: https://github.com/jiyaping/http-proxy-pool
|
62
|
+
licenses:
|
63
|
+
- MIT
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ! '>='
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
requirements: []
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 1.8.16
|
83
|
+
signing_key:
|
84
|
+
specification_version: 3
|
85
|
+
summary: http proxy crawling from web
|
86
|
+
test_files: []
|