http_crawler 0.3.0.1 → 0.3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d3f0a5acf0654b1ccba309c40a7153866850ce8f5be508e06cf587f23cf8a1d
4
- data.tar.gz: 38ca43bafdecec27eb57078de5b07dcc07a0d30b75807a44895eb7441c0860ff
3
+ metadata.gz: e47fc7ceac8e7335c7d873104a8ca7f504885af1c19a0802d23c1986d4ae5588
4
+ data.tar.gz: 392e793eae03814c1f3475e7515124d51b8adcdffdec9065873a90c800765225
5
5
  SHA512:
6
- metadata.gz: 98d2057ce312a8beef8508ef411f2e1d3b7f65c0588c19f3dffc1a71f1c85a259e812e8f7fb48b66f0b25120a012eccafaba096e4d1ca1647e3e7fdb790fd6d8
7
- data.tar.gz: 6f8b27afbf39767e1484ff62247e007fb666a8d49d6893968be85a93925b8b2f7207ef06e7843902d08d270af3041320f9b70e4f38248108841c4d1be3359b6c
6
+ metadata.gz: fb7ba4091d7320d1fcbb3926edb060fd55155156c34cf42b7ea1b67e1b8eba3c0cdf317a2f53d8094dee3672a17058dd57f688da6a89b4f86cfcdedad5bda42f
7
+ data.tar.gz: cd6001c16fbbff9023fe26c739fe270c62176849d3a4809d7bfa1aff4dd74856b6a8db95297c312d4fe56334dc7c8f04772d6eeb8a97f8d2de9a9df841c8a2ab
data/lib/http_crawler.rb CHANGED
@@ -3,6 +3,7 @@ require 'json'
3
3
  require 'digest/md5'
4
4
  require 'nokogiri'
5
5
 
6
+ require 'http_crawler/errors.rb'
6
7
  load 'http_crawler/common.rb'
7
8
  load 'http_crawler/client.rb'
8
9
  load 'http_crawler/web.rb'
@@ -232,21 +232,27 @@ module HttpCrawler
232
232
  n = max_error_num
233
233
  begin
234
234
  block.call
235
- rescue HTTP::TimeoutError
236
- # 超时错误切换代理
237
- if self.update_proxy?
238
- retry
239
- else
240
- raise error
241
- end
242
235
  rescue => error
243
- # 错误尝试次数
244
- if n <= 0
245
- raise error
236
+
237
+ case error
238
+ when HTTP::TimeoutError
239
+ # 超时错误切换代理
240
+ if self.update_proxy?
241
+ retry
242
+ else
243
+ raise error
244
+ end
245
+
246
246
  else
247
- n -= 1
248
- retry
247
+ # 错误尝试次数
248
+ if n <= 0
249
+ raise error
250
+ else
251
+ n -= 1
252
+ retry
253
+ end
249
254
  end
255
+
250
256
  end
251
257
  end
252
258
  end
@@ -0,0 +1,9 @@
1
+
2
+ module HttpCrawler
3
+ # 通用的错误类型
4
+ class Error < StandardError; end
5
+
6
+ # 验证码错误
7
+ class VerificationError < Error; end
8
+
9
+ end
@@ -60,9 +60,10 @@ module HTTP
60
60
  def validation_page?
61
61
  # 正则匹配数组 validations 的所有匹配值
62
62
  validations.each do |regular|
63
- if decoding_body[regular]
63
+ regular_num = decoding_body =~ regular
64
+ if regular_num
64
65
  Rails.logger.warn("触发验证信息")
65
- Rails.logger.warn(decoding_body[(decoding_body =~ regular)..100])
66
+ Rails.logger.warn(decoding_body[regular_num..(regular_num + 100)])
66
67
  return true
67
68
  end
68
69
  end
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.0.1"
2
+ VERSION = "0.3.0.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0.1
4
+ version: 0.3.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-02-17 00:00:00.000000000 Z
11
+ date: 2019-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -136,6 +136,7 @@ files:
136
136
  - lib/http_crawler/common.rb
137
137
  - lib/http_crawler/common/object.rb
138
138
  - lib/http_crawler/common/string.rb
139
+ - lib/http_crawler/errors.rb
139
140
  - lib/http_crawler/http.rb
140
141
  - lib/http_crawler/http/response.rb
141
142
  - lib/http_crawler/proxy.rb