http_crawler 0.3.0.1 → 0.3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/http_crawler.rb +1 -0
- data/lib/http_crawler/client.rb +18 -12
- data/lib/http_crawler/errors.rb +9 -0
- data/lib/http_crawler/http/response.rb +3 -2
- data/lib/http_crawler/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e47fc7ceac8e7335c7d873104a8ca7f504885af1c19a0802d23c1986d4ae5588
|
4
|
+
data.tar.gz: 392e793eae03814c1f3475e7515124d51b8adcdffdec9065873a90c800765225
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb7ba4091d7320d1fcbb3926edb060fd55155156c34cf42b7ea1b67e1b8eba3c0cdf317a2f53d8094dee3672a17058dd57f688da6a89b4f86cfcdedad5bda42f
|
7
|
+
data.tar.gz: cd6001c16fbbff9023fe26c739fe270c62176849d3a4809d7bfa1aff4dd74856b6a8db95297c312d4fe56334dc7c8f04772d6eeb8a97f8d2de9a9df841c8a2ab
|
data/lib/http_crawler.rb
CHANGED
data/lib/http_crawler/client.rb
CHANGED
@@ -232,21 +232,27 @@ module HttpCrawler
|
|
232
232
|
n = max_error_num
|
233
233
|
begin
|
234
234
|
block.call
|
235
|
-
rescue HTTP::TimeoutError
|
236
|
-
# 超时错误切换代理
|
237
|
-
if self.update_proxy?
|
238
|
-
retry
|
239
|
-
else
|
240
|
-
raise error
|
241
|
-
end
|
242
235
|
rescue => error
|
243
|
-
|
244
|
-
|
245
|
-
|
236
|
+
|
237
|
+
case error
|
238
|
+
when HTTP::TimeoutError
|
239
|
+
# 超时错误切换代理
|
240
|
+
if self.update_proxy?
|
241
|
+
retry
|
242
|
+
else
|
243
|
+
raise error
|
244
|
+
end
|
245
|
+
|
246
246
|
else
|
247
|
-
|
248
|
-
|
247
|
+
# 错误尝试次数
|
248
|
+
if n <= 0
|
249
|
+
raise error
|
250
|
+
else
|
251
|
+
n -= 1
|
252
|
+
retry
|
253
|
+
end
|
249
254
|
end
|
255
|
+
|
250
256
|
end
|
251
257
|
end
|
252
258
|
end
|
@@ -60,9 +60,10 @@ module HTTP
|
|
60
60
|
def validation_page?
|
61
61
|
# 正则匹配数组 validations 的所有匹配值
|
62
62
|
validations.each do |regular|
|
63
|
-
|
63
|
+
regular_num = decoding_body =~ regular
|
64
|
+
if regular_num
|
64
65
|
Rails.logger.warn("触发验证信息")
|
65
|
-
Rails.logger.warn(decoding_body[(
|
66
|
+
Rails.logger.warn(decoding_body[regular_num..(regular_num + 100)])
|
66
67
|
return true
|
67
68
|
end
|
68
69
|
end
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.0.
|
4
|
+
version: 0.3.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-02-
|
11
|
+
date: 2019-02-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -136,6 +136,7 @@ files:
|
|
136
136
|
- lib/http_crawler/common.rb
|
137
137
|
- lib/http_crawler/common/object.rb
|
138
138
|
- lib/http_crawler/common/string.rb
|
139
|
+
- lib/http_crawler/errors.rb
|
139
140
|
- lib/http_crawler/http.rb
|
140
141
|
- lib/http_crawler/http/response.rb
|
141
142
|
- lib/http_crawler/proxy.rb
|