http_crawler 0.3.1.25 → 0.3.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/http_crawler/client.rb +13 -13
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7accd1bd33503aeafee43ad0a4c3be71ea114ca77fa3371b3ad22cec58e4398
|
4
|
+
data.tar.gz: 0c3e475dbeefcdaea852c4a7e959c48ed89e474d6ffcbf83184e64c420794e95
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e66c42a281aaf97e3ce2b0bcd84546e05f226791f859ccac9d16853f6ded5c465ebfb3eac9560b6a5a86311f57dea9f741177a92d38f0fefff8375b0d2e3598
|
7
|
+
data.tar.gz: 92cf78fd9681ad8420c897ed09f317a3f1585e8edae06d1d0102da18062bd35217e8b528fe83360b2e408ffbcc224b9888f1931d7cd39c926407d35280be11d4
|
data/lib/http_crawler/client.rb
CHANGED
@@ -206,6 +206,7 @@ module HttpCrawler
|
|
206
206
|
proxy_client = HttpCrawler::Proxy.for(proxy_api)
|
207
207
|
proxy_r = proxy_client.get_proxy(proxy_params.symbolize_keys)
|
208
208
|
proxy_ip = proxy_r.results unless proxy_r.results.blank?
|
209
|
+
proxy_ip = {p_addr: "127.0.0.1", p_port: 8888}
|
209
210
|
if proxy_ip.blank?
|
210
211
|
Rails.logger.warn "无最新代理等待5秒后重新获取:proxy 为空"
|
211
212
|
else
|
@@ -327,22 +328,21 @@ module HttpCrawler
|
|
327
328
|
end
|
328
329
|
rescue => error
|
329
330
|
Rails.logger.debug error.class
|
330
|
-
|
331
|
-
|
332
|
-
#
|
333
|
-
|
334
|
-
retry
|
335
|
-
else
|
336
|
-
raise error
|
337
|
-
end
|
331
|
+
# 错误尝试次数
|
332
|
+
if n <= 0
|
333
|
+
# 错误尝试次数小于等于0就结束尝试
|
334
|
+
raise error
|
338
335
|
else
|
339
|
-
#
|
340
|
-
|
341
|
-
|
336
|
+
# 每次错误次数尝试 -1
|
337
|
+
n -= 1
|
338
|
+
case error
|
339
|
+
when HTTP::TimeoutError
|
340
|
+
# 超时错误切换代理
|
341
|
+
raise error unless self.update_proxy?
|
342
342
|
else
|
343
|
-
|
344
|
-
retry
|
343
|
+
raise error unless self.update_proxy?
|
345
344
|
end
|
345
|
+
retry
|
346
346
|
end
|
347
347
|
end
|
348
348
|
end # def request(&block)
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1.
|
4
|
+
version: 0.3.1.26
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|