http_crawler 0.3.1.29 → 0.3.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/http_crawler/client.rb +30 -8
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40f6c6a9c5d3225fe8b8c293e05306e8d1776c3648f82b4f7f8387310a55ec2d
|
4
|
+
data.tar.gz: ed8a2164e15d5d9f74672434cda4ee93975bfd8f80cb65b295451509bf25a6f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 345b32732cb544585a3a12c7c6107ab8011c64ea1938043c82c74b66a0f2a12c96c5911c1533e6819c6d6809ddc73c95801c94faaf6f4dea0ddcf0fa58ab2594
|
7
|
+
data.tar.gz: e250dad59f16ae47d3866b0b5b9b9254d1441e2ce0c30fe8f689e83f1c07136b364f9a8c84b8e257c942f00c58400bdb5283aed99695036a22e0f9604a04194f
|
data/lib/http_crawler/client.rb
CHANGED
@@ -9,8 +9,8 @@ module HttpCrawler
|
|
9
9
|
# web_name = "biquge_duquanben"
|
10
10
|
# 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
|
11
11
|
#
|
12
|
-
def for(web_name)
|
13
|
-
"HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new()
|
12
|
+
def for(web_name, args = {})
|
13
|
+
"HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new(args)
|
14
14
|
end
|
15
15
|
|
16
16
|
#
|
@@ -18,8 +18,8 @@ module HttpCrawler
|
|
18
18
|
# module_name = "HttpCrawler::Web::BiqugeDuquanben"
|
19
19
|
# 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
|
20
20
|
#
|
21
|
-
def for_module(module_name,
|
22
|
-
"#{module_name}::Client".constantize.new()
|
21
|
+
def for_module(module_name, args = {})
|
22
|
+
"#{module_name}::Client".constantize.new(args)
|
23
23
|
end
|
24
24
|
|
25
25
|
def for_uri(path)
|
@@ -54,6 +54,7 @@ module HttpCrawler
|
|
54
54
|
# 初始化一些 client 自定义参数
|
55
55
|
init_client
|
56
56
|
|
57
|
+
self.redirect = true
|
57
58
|
# 初始化 代理参数
|
58
59
|
@proxy_params = {key: "#{self.class.to_s.gsub(":", "_")}"}
|
59
60
|
end
|
@@ -109,6 +110,8 @@ module HttpCrawler
|
|
109
110
|
end
|
110
111
|
end
|
111
112
|
|
113
|
+
attr_accessor :redirect
|
114
|
+
|
112
115
|
attr_accessor :header
|
113
116
|
# 头文件相关方法
|
114
117
|
def header(parameter = {})
|
@@ -147,6 +150,11 @@ module HttpCrawler
|
|
147
150
|
|
148
151
|
def update_cookies(parameter = {})
|
149
152
|
parameter = parameter.symbolize_keys
|
153
|
+
|
154
|
+
@response.cookies.each do |cookie|
|
155
|
+
@cookies.add(cookie)
|
156
|
+
end unless @response.blank?
|
157
|
+
|
150
158
|
nil
|
151
159
|
end
|
152
160
|
|
@@ -244,10 +252,19 @@ module HttpCrawler
|
|
244
252
|
nil
|
245
253
|
end
|
246
254
|
|
247
|
-
#
|
248
|
-
|
255
|
+
# 创建时间: 2019/9/11 17:11
|
256
|
+
# 更新时间: 2019/9/11
|
257
|
+
# 作者: Jagger
|
258
|
+
# 方法名称: init_http
|
259
|
+
# 方法说明: 初始化http请求前置条件
|
260
|
+
# 调用方式: #init_http
|
261
|
+
#
|
262
|
+
# @return HTTP
|
263
|
+
#
|
264
|
+
def init_http
|
265
|
+
h = HTTP
|
249
266
|
# 自动重定向。最大重定向次数 max_hops: 5
|
250
|
-
h =
|
267
|
+
h = h.follow(max_hops: 5) if self.redirect == true
|
251
268
|
|
252
269
|
# 添加代理
|
253
270
|
h = h.via(@proxy[:p_addr], @proxy[:p_port].to_i, @proxy[:p_user], @proxy[:p_pass]) unless (@proxy.blank?)
|
@@ -270,6 +287,11 @@ module HttpCrawler
|
|
270
287
|
h
|
271
288
|
end
|
272
289
|
|
290
|
+
# 初始化http请求前置条件
|
291
|
+
def http
|
292
|
+
init_http
|
293
|
+
end
|
294
|
+
|
273
295
|
|
274
296
|
# 发送 get 请求
|
275
297
|
def get(path, params = {}, limit = 3)
|
@@ -322,7 +344,7 @@ module HttpCrawler
|
|
322
344
|
n = max_error_num
|
323
345
|
begin
|
324
346
|
r = block.call
|
325
|
-
if r.status.success?
|
347
|
+
if r.status.success? || (redirect == false && r.status.redirect?)
|
326
348
|
return r
|
327
349
|
else
|
328
350
|
raise "请求失败(#{r.code}):#{r.uri.to_s}"
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1.
|
4
|
+
version: 0.3.1.30
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|