http_crawler 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/http_crawler/client.rb +1 -1
- data/lib/http_crawler/http.rb +2 -2
- data/lib/http_crawler/proxy/test_proxy_api/client.rb +4 -4
- data/lib/http_crawler/proxy/test_proxy_api/response/get_proxy.rb +1 -1
- data/lib/http_crawler/proxy.rb +3 -3
- data/lib/http_crawler/version.rb +1 -1
- data/lib/http_crawler/web/baidu/client.rb +2 -2
- data/lib/http_crawler/web/baidu/response/index.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51fa116a088c7a5d065b9dc4ed70185f85102ed6
|
4
|
+
data.tar.gz: 711b59b48c0c782d24ffed5d76d88c82e011d17b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff1e9c6f6b7734ffadfd5a0c33a1412b4542445a9d86840847885269b36a764faa69349c313d8ed085493dca8544a77609577f595cad884564bca4164064d991
|
7
|
+
data.tar.gz: 94dc18a2b253e9a2fda57b51067b191d49ea88df04128c3f7bfb88745c99afe33d9dc64bdde7fb79f7b58a3cab2b2cbf08d6bf2d298b316e488cc439bb6481e3
|
data/lib/http_crawler/client.rb
CHANGED
data/lib/http_crawler/http.rb
CHANGED
@@ -6,7 +6,7 @@ module HttpCrawler
|
|
6
6
|
|
7
7
|
# 自动获取代理,true 表示自动获取代理 、false 表示不自动获取
|
8
8
|
attr_accessor :auto_proxy
|
9
|
-
# 代理API的别名 主要关联
|
9
|
+
# 代理API的别名 主要关联 HttpCrawler::Proxy中维护的代理API
|
10
10
|
attr_accessor :proxy_api
|
11
11
|
# 调用自己的代理池所需要的主键 key
|
12
12
|
attr_accessor :proxy_key
|
@@ -58,7 +58,7 @@ module HttpCrawler
|
|
58
58
|
|
59
59
|
while @@proxy_list.blank?
|
60
60
|
Rails.logger.debug("@@proxy_list 为空进行更新")
|
61
|
-
proxy_client =
|
61
|
+
proxy_client = HttpCrawler::Proxy.for(proxy_api)
|
62
62
|
proxy_r = proxy_client.get_proxy(key: proxy_key)
|
63
63
|
@@proxy_list << proxy_r.parsing
|
64
64
|
Rails.logger.debug("@@proxy_list => #{@@proxy_list}")
|
@@ -3,8 +3,8 @@ module HttpCrawler
|
|
3
3
|
module TestProxyApi
|
4
4
|
class Client
|
5
5
|
|
6
|
-
include(
|
7
|
-
include(
|
6
|
+
include(HttpCrawler::Client)
|
7
|
+
include(HttpCrawler::Proxy::Client)
|
8
8
|
|
9
9
|
class << self
|
10
10
|
def new(*args)
|
@@ -19,11 +19,11 @@ module HttpCrawler
|
|
19
19
|
# http://39.108.59.38:7772/Tools/proxyIP.ashx?OrderNumber=ccd4c8912691f28861a1ed048fec88dc&poolIndex=22717&cache=1&qty=2
|
20
20
|
def get_proxy(parameter = {})
|
21
21
|
r = http.get_fetch("/api/get_proxy")
|
22
|
-
r.extend(
|
22
|
+
r.extend(HttpCrawler::Proxy::Laofu::Response::GetProxy)
|
23
23
|
end
|
24
24
|
|
25
25
|
end
|
26
26
|
end # module BiQuGe_DuQuanBen
|
27
27
|
end # module Web
|
28
|
-
end # module
|
28
|
+
end # module HttpCrawler
|
29
29
|
|
data/lib/http_crawler/proxy.rb
CHANGED
@@ -4,11 +4,11 @@ module HttpCrawler
|
|
4
4
|
class << self
|
5
5
|
|
6
6
|
# 接收格式
|
7
|
-
# web_name = "
|
8
|
-
# 返回
|
7
|
+
# web_name = "test_proxy_api"
|
8
|
+
# 返回 HttpCrawler::Proxy::TestProxyApi::Client 实例
|
9
9
|
#
|
10
10
|
def for(web_name, *arg)
|
11
|
-
"
|
11
|
+
"HttpCrawler::Proxy::#{web_name.camelize}::Client".constantize.new(*arg)
|
12
12
|
end
|
13
13
|
|
14
14
|
end
|
data/lib/http_crawler/version.rb
CHANGED
@@ -2,7 +2,7 @@ module HttpCrawler
|
|
2
2
|
module Web
|
3
3
|
module Baidu
|
4
4
|
class Client
|
5
|
-
include(
|
5
|
+
include(HttpCrawler::Client)
|
6
6
|
|
7
7
|
def init_http
|
8
8
|
@http.open_timeout = 3
|
@@ -21,5 +21,5 @@ module HttpCrawler
|
|
21
21
|
end
|
22
22
|
end # module Baidu
|
23
23
|
end # module Web
|
24
|
-
end # module
|
24
|
+
end # module HttpCrawler
|
25
25
|
|