http_crawler 0.3.0.1 → 0.3.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d3f0a5acf0654b1ccba309c40a7153866850ce8f5be508e06cf587f23cf8a1d
4
- data.tar.gz: 38ca43bafdecec27eb57078de5b07dcc07a0d30b75807a44895eb7441c0860ff
3
+ metadata.gz: e47fc7ceac8e7335c7d873104a8ca7f504885af1c19a0802d23c1986d4ae5588
4
+ data.tar.gz: 392e793eae03814c1f3475e7515124d51b8adcdffdec9065873a90c800765225
5
5
  SHA512:
6
- metadata.gz: 98d2057ce312a8beef8508ef411f2e1d3b7f65c0588c19f3dffc1a71f1c85a259e812e8f7fb48b66f0b25120a012eccafaba096e4d1ca1647e3e7fdb790fd6d8
7
- data.tar.gz: 6f8b27afbf39767e1484ff62247e007fb666a8d49d6893968be85a93925b8b2f7207ef06e7843902d08d270af3041320f9b70e4f38248108841c4d1be3359b6c
6
+ metadata.gz: fb7ba4091d7320d1fcbb3926edb060fd55155156c34cf42b7ea1b67e1b8eba3c0cdf317a2f53d8094dee3672a17058dd57f688da6a89b4f86cfcdedad5bda42f
7
+ data.tar.gz: cd6001c16fbbff9023fe26c739fe270c62176849d3a4809d7bfa1aff4dd74856b6a8db95297c312d4fe56334dc7c8f04772d6eeb8a97f8d2de9a9df841c8a2ab
data/lib/http_crawler.rb CHANGED
@@ -3,6 +3,7 @@ require 'json'
3
3
  require 'digest/md5'
4
4
  require 'nokogiri'
5
5
 
6
+ require 'http_crawler/errors.rb'
6
7
  load 'http_crawler/common.rb'
7
8
  load 'http_crawler/client.rb'
8
9
  load 'http_crawler/web.rb'
@@ -232,21 +232,27 @@ module HttpCrawler
232
232
  n = max_error_num
233
233
  begin
234
234
  block.call
235
- rescue HTTP::TimeoutError
236
- # 超时错误切换代理
237
- if self.update_proxy?
238
- retry
239
- else
240
- raise error
241
- end
242
235
  rescue => error
243
- # 错误尝试次数
244
- if n <= 0
245
- raise error
236
+
237
+ case error
238
+ when HTTP::TimeoutError
239
+ # 超时错误切换代理
240
+ if self.update_proxy?
241
+ retry
242
+ else
243
+ raise error
244
+ end
245
+
246
246
  else
247
- n -= 1
248
- retry
247
+ # 错误尝试次数
248
+ if n <= 0
249
+ raise error
250
+ else
251
+ n -= 1
252
+ retry
253
+ end
249
254
  end
255
+
250
256
  end
251
257
  end
252
258
  end
@@ -0,0 +1,9 @@
1
+
2
+ module HttpCrawler
3
+ # 通用的错误类型
4
+ class Error < StandardError; end
5
+
6
+ # 验证码错误
7
+ class VerificationError < Error; end
8
+
9
+ end
@@ -60,9 +60,10 @@ module HTTP
60
60
  def validation_page?
61
61
  # 正则匹配数组 validations 的所有匹配值
62
62
  validations.each do |regular|
63
- if decoding_body[regular]
63
+ regular_num = decoding_body =~ regular
64
+ if regular_num
64
65
  Rails.logger.warn("触发验证信息")
65
- Rails.logger.warn(decoding_body[(decoding_body =~ regular)..100])
66
+ Rails.logger.warn(decoding_body[regular_num..(regular_num + 100)])
66
67
  return true
67
68
  end
68
69
  end
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.0.1"
2
+ VERSION = "0.3.0.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0.1
4
+ version: 0.3.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-02-17 00:00:00.000000000 Z
11
+ date: 2019-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -136,6 +136,7 @@ files:
136
136
  - lib/http_crawler/common.rb
137
137
  - lib/http_crawler/common/object.rb
138
138
  - lib/http_crawler/common/string.rb
139
+ - lib/http_crawler/errors.rb
139
140
  - lib/http_crawler/http.rb
140
141
  - lib/http_crawler/http/response.rb
141
142
  - lib/http_crawler/proxy.rb