http_crawler 0.3.1.29 → 0.3.1.30

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 329e17b63dd792550707d01ce3a310a8bc81d2b04c5d2a6b5efc983a1b0a1eff
4
- data.tar.gz: 1a0d14ccb1326181e9c929de5ce1f8e057c698c3b10ee88eab1a9685b6bf0c39
3
+ metadata.gz: 40f6c6a9c5d3225fe8b8c293e05306e8d1776c3648f82b4f7f8387310a55ec2d
4
+ data.tar.gz: ed8a2164e15d5d9f74672434cda4ee93975bfd8f80cb65b295451509bf25a6f9
5
5
  SHA512:
6
- metadata.gz: 88a8389db13b3ed17a82772d9a66378c6184133636d53617e5550d6cdd27cd542c3a5ca1320faf94c3b1a2a20441b119e1a9b915ecd268802e0a993711149fcc
7
- data.tar.gz: 8ac49deb0e1b0c3a813f2648d7d9a55e6135b883d6f9c73c7d962c37f5af364d79284c58e5069555bef433c84e4e91d71904ca3b695aa28daef2e503ee23625f
6
+ metadata.gz: 345b32732cb544585a3a12c7c6107ab8011c64ea1938043c82c74b66a0f2a12c96c5911c1533e6819c6d6809ddc73c95801c94faaf6f4dea0ddcf0fa58ab2594
7
+ data.tar.gz: e250dad59f16ae47d3866b0b5b9b9254d1441e2ce0c30fe8f689e83f1c07136b364f9a8c84b8e257c942f00c58400bdb5283aed99695036a22e0f9604a04194f
@@ -9,8 +9,8 @@ module HttpCrawler
9
9
  # web_name = "biquge_duquanben"
10
10
  # 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
11
11
  #
12
- def for(web_name)
13
- "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new()
12
+ def for(web_name, args = {})
13
+ "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new(args)
14
14
  end
15
15
 
16
16
  #
@@ -18,8 +18,8 @@ module HttpCrawler
18
18
  # module_name = "HttpCrawler::Web::BiqugeDuquanben"
19
19
  # 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
20
20
  #
21
- def for_module(module_name, *args)
22
- "#{module_name}::Client".constantize.new()
21
+ def for_module(module_name, args = {})
22
+ "#{module_name}::Client".constantize.new(args)
23
23
  end
24
24
 
25
25
  def for_uri(path)
@@ -54,6 +54,7 @@ module HttpCrawler
54
54
  # 初始化一些 client 自定义参数
55
55
  init_client
56
56
 
57
+ self.redirect = true
57
58
  # 初始化 代理参数
58
59
  @proxy_params = {key: "#{self.class.to_s.gsub(":", "_")}"}
59
60
  end
@@ -109,6 +110,8 @@ module HttpCrawler
109
110
  end
110
111
  end
111
112
 
113
+ attr_accessor :redirect
114
+
112
115
  attr_accessor :header
113
116
  # 头文件相关方法
114
117
  def header(parameter = {})
@@ -147,6 +150,11 @@ module HttpCrawler
147
150
 
148
151
  def update_cookies(parameter = {})
149
152
  parameter = parameter.symbolize_keys
153
+
154
+ @response.cookies.each do |cookie|
155
+ @cookies.add(cookie)
156
+ end unless @response.blank?
157
+
150
158
  nil
151
159
  end
152
160
 
@@ -244,10 +252,19 @@ module HttpCrawler
244
252
  nil
245
253
  end
246
254
 
247
- # 初始化http请求前置条件
248
- def http
255
+ # 创建时间: 2019/9/11 17:11
256
+ # 更新时间: 2019/9/11
257
+ # 作者: Jagger
258
+ # 方法名称: init_http
259
+ # 方法说明: 初始化http请求前置条件
260
+ # 调用方式: #init_http
261
+ #
262
+ # @return HTTP
263
+ #
264
+ def init_http
265
+ h = HTTP
249
266
  # 自动重定向。最大重定向次数 max_hops: 5
250
- h = HTTP.follow(max_hops: 5)
267
+ h = h.follow(max_hops: 5) if self.redirect == true
251
268
 
252
269
  # 添加代理
253
270
  h = h.via(@proxy[:p_addr], @proxy[:p_port].to_i, @proxy[:p_user], @proxy[:p_pass]) unless (@proxy.blank?)
@@ -270,6 +287,11 @@ module HttpCrawler
270
287
  h
271
288
  end
272
289
 
290
+ # 初始化http请求前置条件
291
+ def http
292
+ init_http
293
+ end
294
+
273
295
 
274
296
  # 发送 get 请求
275
297
  def get(path, params = {}, limit = 3)
@@ -322,7 +344,7 @@ module HttpCrawler
322
344
  n = max_error_num
323
345
  begin
324
346
  r = block.call
325
- if r.status.success?
347
+ if r.status.success? || (redirect == false && r.status.redirect?)
326
348
  return r
327
349
  else
328
350
  raise "请求失败(#{r.code}):#{r.uri.to_s}"
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.1.29"
2
+ VERSION = "0.3.1.30"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1.29
4
+ version: 0.3.1.30
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-19 00:00:00.000000000 Z
11
+ date: 2019-09-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec