http_crawler 0.3.1.29 → 0.3.1.30
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/http_crawler/client.rb +30 -8
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40f6c6a9c5d3225fe8b8c293e05306e8d1776c3648f82b4f7f8387310a55ec2d
|
4
|
+
data.tar.gz: ed8a2164e15d5d9f74672434cda4ee93975bfd8f80cb65b295451509bf25a6f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 345b32732cb544585a3a12c7c6107ab8011c64ea1938043c82c74b66a0f2a12c96c5911c1533e6819c6d6809ddc73c95801c94faaf6f4dea0ddcf0fa58ab2594
|
7
|
+
data.tar.gz: e250dad59f16ae47d3866b0b5b9b9254d1441e2ce0c30fe8f689e83f1c07136b364f9a8c84b8e257c942f00c58400bdb5283aed99695036a22e0f9604a04194f
|
data/lib/http_crawler/client.rb
CHANGED
@@ -9,8 +9,8 @@ module HttpCrawler
|
|
9
9
|
# web_name = "biquge_duquanben"
|
10
10
|
# 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
|
11
11
|
#
|
12
|
-
def for(web_name)
|
13
|
-
"HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new()
|
12
|
+
def for(web_name, args = {})
|
13
|
+
"HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new(args)
|
14
14
|
end
|
15
15
|
|
16
16
|
#
|
@@ -18,8 +18,8 @@ module HttpCrawler
|
|
18
18
|
# module_name = "HttpCrawler::Web::BiqugeDuquanben"
|
19
19
|
# 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
|
20
20
|
#
|
21
|
-
def for_module(module_name,
|
22
|
-
"#{module_name}::Client".constantize.new()
|
21
|
+
def for_module(module_name, args = {})
|
22
|
+
"#{module_name}::Client".constantize.new(args)
|
23
23
|
end
|
24
24
|
|
25
25
|
def for_uri(path)
|
@@ -54,6 +54,7 @@ module HttpCrawler
|
|
54
54
|
# 初始化一些 client 自定义参数
|
55
55
|
init_client
|
56
56
|
|
57
|
+
self.redirect = true
|
57
58
|
# 初始化 代理参数
|
58
59
|
@proxy_params = {key: "#{self.class.to_s.gsub(":", "_")}"}
|
59
60
|
end
|
@@ -109,6 +110,8 @@ module HttpCrawler
|
|
109
110
|
end
|
110
111
|
end
|
111
112
|
|
113
|
+
attr_accessor :redirect
|
114
|
+
|
112
115
|
attr_accessor :header
|
113
116
|
# 头文件相关方法
|
114
117
|
def header(parameter = {})
|
@@ -147,6 +150,11 @@ module HttpCrawler
|
|
147
150
|
|
148
151
|
def update_cookies(parameter = {})
|
149
152
|
parameter = parameter.symbolize_keys
|
153
|
+
|
154
|
+
@response.cookies.each do |cookie|
|
155
|
+
@cookies.add(cookie)
|
156
|
+
end unless @response.blank?
|
157
|
+
|
150
158
|
nil
|
151
159
|
end
|
152
160
|
|
@@ -244,10 +252,19 @@ module HttpCrawler
|
|
244
252
|
nil
|
245
253
|
end
|
246
254
|
|
247
|
-
#
|
248
|
-
|
255
|
+
# 创建时间: 2019/9/11 17:11
|
256
|
+
# 更新时间: 2019/9/11
|
257
|
+
# 作者: Jagger
|
258
|
+
# 方法名称: init_http
|
259
|
+
# 方法说明: 初始化http请求前置条件
|
260
|
+
# 调用方式: #init_http
|
261
|
+
#
|
262
|
+
# @return HTTP
|
263
|
+
#
|
264
|
+
def init_http
|
265
|
+
h = HTTP
|
249
266
|
# 自动重定向。最大重定向次数 max_hops: 5
|
250
|
-
h =
|
267
|
+
h = h.follow(max_hops: 5) if self.redirect == true
|
251
268
|
|
252
269
|
# 添加代理
|
253
270
|
h = h.via(@proxy[:p_addr], @proxy[:p_port].to_i, @proxy[:p_user], @proxy[:p_pass]) unless (@proxy.blank?)
|
@@ -270,6 +287,11 @@ module HttpCrawler
|
|
270
287
|
h
|
271
288
|
end
|
272
289
|
|
290
|
+
# 初始化http请求前置条件
|
291
|
+
def http
|
292
|
+
init_http
|
293
|
+
end
|
294
|
+
|
273
295
|
|
274
296
|
# 发送 get 请求
|
275
297
|
def get(path, params = {}, limit = 3)
|
@@ -322,7 +344,7 @@ module HttpCrawler
|
|
322
344
|
n = max_error_num
|
323
345
|
begin
|
324
346
|
r = block.call
|
325
|
-
if r.status.success?
|
347
|
+
if r.status.success? || (redirect == false && r.status.redirect?)
|
326
348
|
return r
|
327
349
|
else
|
328
350
|
raise "请求失败(#{r.code}):#{r.uri.to_s}"
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1.
|
4
|
+
version: 0.3.1.30
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|