http_crawler 0.3.1.29 → 0.3.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 329e17b63dd792550707d01ce3a310a8bc81d2b04c5d2a6b5efc983a1b0a1eff
4
- data.tar.gz: 1a0d14ccb1326181e9c929de5ce1f8e057c698c3b10ee88eab1a9685b6bf0c39
3
+ metadata.gz: 40f6c6a9c5d3225fe8b8c293e05306e8d1776c3648f82b4f7f8387310a55ec2d
4
+ data.tar.gz: ed8a2164e15d5d9f74672434cda4ee93975bfd8f80cb65b295451509bf25a6f9
5
5
  SHA512:
6
- metadata.gz: 88a8389db13b3ed17a82772d9a66378c6184133636d53617e5550d6cdd27cd542c3a5ca1320faf94c3b1a2a20441b119e1a9b915ecd268802e0a993711149fcc
7
- data.tar.gz: 8ac49deb0e1b0c3a813f2648d7d9a55e6135b883d6f9c73c7d962c37f5af364d79284c58e5069555bef433c84e4e91d71904ca3b695aa28daef2e503ee23625f
6
+ metadata.gz: 345b32732cb544585a3a12c7c6107ab8011c64ea1938043c82c74b66a0f2a12c96c5911c1533e6819c6d6809ddc73c95801c94faaf6f4dea0ddcf0fa58ab2594
7
+ data.tar.gz: e250dad59f16ae47d3866b0b5b9b9254d1441e2ce0c30fe8f689e83f1c07136b364f9a8c84b8e257c942f00c58400bdb5283aed99695036a22e0f9604a04194f
@@ -9,8 +9,8 @@ module HttpCrawler
9
9
  # web_name = "biquge_duquanben"
10
10
  # 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
11
11
  #
12
- def for(web_name)
13
- "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new()
12
+ def for(web_name, args = {})
13
+ "HttpCrawler::Web::#{web_name.camelize}::Client".constantize.new(args)
14
14
  end
15
15
 
16
16
  #
@@ -18,8 +18,8 @@ module HttpCrawler
18
18
  # module_name = "HttpCrawler::Web::BiqugeDuquanben"
19
19
  # 返回 HttpCrawler::Web::BiqugeDuquanben::Client 实例
20
20
  #
21
- def for_module(module_name, *args)
22
- "#{module_name}::Client".constantize.new()
21
+ def for_module(module_name, args = {})
22
+ "#{module_name}::Client".constantize.new(args)
23
23
  end
24
24
 
25
25
  def for_uri(path)
@@ -54,6 +54,7 @@ module HttpCrawler
54
54
  # 初始化一些 client 自定义参数
55
55
  init_client
56
56
 
57
+ self.redirect = true
57
58
  # 初始化 代理参数
58
59
  @proxy_params = {key: "#{self.class.to_s.gsub(":", "_")}"}
59
60
  end
@@ -109,6 +110,8 @@ module HttpCrawler
109
110
  end
110
111
  end
111
112
 
113
+ attr_accessor :redirect
114
+
112
115
  attr_accessor :header
113
116
  # 头文件相关方法
114
117
  def header(parameter = {})
@@ -147,6 +150,11 @@ module HttpCrawler
147
150
 
148
151
  def update_cookies(parameter = {})
149
152
  parameter = parameter.symbolize_keys
153
+
154
+ @response.cookies.each do |cookie|
155
+ @cookies.add(cookie)
156
+ end unless @response.blank?
157
+
150
158
  nil
151
159
  end
152
160
 
@@ -244,10 +252,19 @@ module HttpCrawler
244
252
  nil
245
253
  end
246
254
 
247
- # 初始化http请求前置条件
248
- def http
255
+ # 创建时间: 2019/9/11 17:11
256
+ # 更新时间: 2019/9/11
257
+ # 作者: Jagger
258
+ # 方法名称: init_http
259
+ # 方法说明: 初始化http请求前置条件
260
+ # 调用方式: #init_http
261
+ #
262
+ # @return HTTP
263
+ #
264
+ def init_http
265
+ h = HTTP
249
266
  # 自动重定向。最大重定向次数 max_hops: 5
250
- h = HTTP.follow(max_hops: 5)
267
+ h = h.follow(max_hops: 5) if self.redirect == true
251
268
 
252
269
  # 添加代理
253
270
  h = h.via(@proxy[:p_addr], @proxy[:p_port].to_i, @proxy[:p_user], @proxy[:p_pass]) unless (@proxy.blank?)
@@ -270,6 +287,11 @@ module HttpCrawler
270
287
  h
271
288
  end
272
289
 
290
+ # 初始化http请求前置条件
291
+ def http
292
+ init_http
293
+ end
294
+
273
295
 
274
296
  # 发送 get 请求
275
297
  def get(path, params = {}, limit = 3)
@@ -322,7 +344,7 @@ module HttpCrawler
322
344
  n = max_error_num
323
345
  begin
324
346
  r = block.call
325
- if r.status.success?
347
+ if r.status.success? || (redirect == false && r.status.redirect?)
326
348
  return r
327
349
  else
328
350
  raise "请求失败(#{r.code}):#{r.uri.to_s}"
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.1.29"
2
+ VERSION = "0.3.1.30"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1.29
4
+ version: 0.3.1.30
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-19 00:00:00.000000000 Z
11
+ date: 2019-09-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec