http_crawler 0.3.0.1 → 0.3.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/http_crawler.rb +1 -0
- data/lib/http_crawler/client.rb +18 -12
- data/lib/http_crawler/errors.rb +9 -0
- data/lib/http_crawler/http/response.rb +3 -2
- data/lib/http_crawler/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e47fc7ceac8e7335c7d873104a8ca7f504885af1c19a0802d23c1986d4ae5588
|
4
|
+
data.tar.gz: 392e793eae03814c1f3475e7515124d51b8adcdffdec9065873a90c800765225
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb7ba4091d7320d1fcbb3926edb060fd55155156c34cf42b7ea1b67e1b8eba3c0cdf317a2f53d8094dee3672a17058dd57f688da6a89b4f86cfcdedad5bda42f
|
7
|
+
data.tar.gz: cd6001c16fbbff9023fe26c739fe270c62176849d3a4809d7bfa1aff4dd74856b6a8db95297c312d4fe56334dc7c8f04772d6eeb8a97f8d2de9a9df841c8a2ab
|
data/lib/http_crawler.rb
CHANGED
data/lib/http_crawler/client.rb
CHANGED
@@ -232,21 +232,27 @@ module HttpCrawler
|
|
232
232
|
n = max_error_num
|
233
233
|
begin
|
234
234
|
block.call
|
235
|
-
rescue HTTP::TimeoutError
|
236
|
-
# 超时错误切换代理
|
237
|
-
if self.update_proxy?
|
238
|
-
retry
|
239
|
-
else
|
240
|
-
raise error
|
241
|
-
end
|
242
235
|
rescue => error
|
243
|
-
|
244
|
-
|
245
|
-
|
236
|
+
|
237
|
+
case error
|
238
|
+
when HTTP::TimeoutError
|
239
|
+
# 超时错误切换代理
|
240
|
+
if self.update_proxy?
|
241
|
+
retry
|
242
|
+
else
|
243
|
+
raise error
|
244
|
+
end
|
245
|
+
|
246
246
|
else
|
247
|
-
|
248
|
-
|
247
|
+
# 错误尝试次数
|
248
|
+
if n <= 0
|
249
|
+
raise error
|
250
|
+
else
|
251
|
+
n -= 1
|
252
|
+
retry
|
253
|
+
end
|
249
254
|
end
|
255
|
+
|
250
256
|
end
|
251
257
|
end
|
252
258
|
end
|
@@ -60,9 +60,10 @@ module HTTP
|
|
60
60
|
def validation_page?
|
61
61
|
# 正则匹配数组 validations 的所有匹配值
|
62
62
|
validations.each do |regular|
|
63
|
-
|
63
|
+
regular_num = decoding_body =~ regular
|
64
|
+
if regular_num
|
64
65
|
Rails.logger.warn("触发验证信息")
|
65
|
-
Rails.logger.warn(decoding_body[(
|
66
|
+
Rails.logger.warn(decoding_body[regular_num..(regular_num + 100)])
|
66
67
|
return true
|
67
68
|
end
|
68
69
|
end
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.0.
|
4
|
+
version: 0.3.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-02-
|
11
|
+
date: 2019-02-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -136,6 +136,7 @@ files:
|
|
136
136
|
- lib/http_crawler/common.rb
|
137
137
|
- lib/http_crawler/common/object.rb
|
138
138
|
- lib/http_crawler/common/string.rb
|
139
|
+
- lib/http_crawler/errors.rb
|
139
140
|
- lib/http_crawler/http.rb
|
140
141
|
- lib/http_crawler/http/response.rb
|
141
142
|
- lib/http_crawler/proxy.rb
|