http_crawler 0.2.2.7 → 0.2.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.idea/workspace.xml +61 -54
- data/lib/http_crawler/http.rb +15 -18
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4126016201a30cb18b5cd7fe089d64fd14b9757f
|
4
|
+
data.tar.gz: 57f61d877b5ecf879293d8426026dc46629c9031
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0bc0225909563a93b8c7d655099ef27895deb469479c21c6827ce23448454915fdaecc5073473a363e9dae30c1aedeeb8bf6c46e6b6b17b40b582f2ec6fe2326
|
7
|
+
data.tar.gz: 74f6022b992c49b4020d50d2dd824176d4c914e46bdb788439f3a1152c4c28d642b8a795926a95ad6e31b421d0715f9f0f69e12629831447ba9ed67809de7656
|
data/.idea/workspace.xml
CHANGED
@@ -28,8 +28,8 @@
|
|
28
28
|
<file leaf-file-name="version.rb" pinned="false" current-in-tab="true">
|
29
29
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
30
30
|
<provider selected="true" editor-type-id="text-editor">
|
31
|
-
<state>
|
32
|
-
<caret
|
31
|
+
<state relative-caret-position="45">
|
32
|
+
<caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
|
33
33
|
</state>
|
34
34
|
</provider>
|
35
35
|
</entry>
|
@@ -46,35 +46,35 @@
|
|
46
46
|
<file leaf-file-name="http.rb" pinned="false" current-in-tab="false">
|
47
47
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
48
48
|
<provider selected="true" editor-type-id="text-editor">
|
49
|
-
<state relative-caret-position="
|
50
|
-
<caret line="
|
49
|
+
<state relative-caret-position="267">
|
50
|
+
<caret line="210" column="21" selection-start-line="210" selection-start-column="21" selection-end-line="210" selection-end-column="21" />
|
51
51
|
</state>
|
52
52
|
</provider>
|
53
53
|
</entry>
|
54
54
|
</file>
|
55
|
-
<file leaf-file-name="
|
56
|
-
<entry file="file://$
|
55
|
+
<file leaf-file-name="compat.rb" pinned="false" current-in-tab="false">
|
56
|
+
<entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/webrick/compat.rb">
|
57
57
|
<provider selected="true" editor-type-id="text-editor">
|
58
|
-
<state relative-caret-position="
|
59
|
-
<caret line="
|
58
|
+
<state relative-caret-position="270">
|
59
|
+
<caret line="18" column="1" lean-forward="true" selection-start-line="18" selection-start-column="1" selection-end-line="18" selection-end-column="1" />
|
60
60
|
</state>
|
61
61
|
</provider>
|
62
62
|
</entry>
|
63
63
|
</file>
|
64
|
-
<file leaf-file-name="
|
65
|
-
<entry file="file://$
|
64
|
+
<file leaf-file-name="errno.rb" pinned="false" current-in-tab="false">
|
65
|
+
<entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/errno.rb">
|
66
66
|
<provider selected="true" editor-type-id="text-editor">
|
67
|
-
<state relative-caret-position="
|
68
|
-
<caret line="
|
67
|
+
<state relative-caret-position="480">
|
68
|
+
<caret line="32" column="7" selection-start-line="32" selection-start-column="7" selection-end-line="32" selection-end-column="7" />
|
69
69
|
</state>
|
70
70
|
</provider>
|
71
71
|
</entry>
|
72
72
|
</file>
|
73
|
-
<file leaf-file-name="
|
74
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
73
|
+
<file leaf-file-name="common.rb" pinned="false" current-in-tab="false">
|
74
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
75
75
|
<provider selected="true" editor-type-id="text-editor">
|
76
|
-
<state relative-caret-position="
|
77
|
-
<caret line="
|
76
|
+
<state relative-caret-position="30">
|
77
|
+
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
78
78
|
</state>
|
79
79
|
</provider>
|
80
80
|
</entry>
|
@@ -88,15 +88,6 @@
|
|
88
88
|
</provider>
|
89
89
|
</entry>
|
90
90
|
</file>
|
91
|
-
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
92
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
93
|
-
<provider selected="true" editor-type-id="text-editor">
|
94
|
-
<state relative-caret-position="150">
|
95
|
-
<caret line="10" column="48" selection-start-line="10" selection-start-column="48" selection-end-line="10" selection-end-column="48" />
|
96
|
-
</state>
|
97
|
-
</provider>
|
98
|
-
</entry>
|
99
|
-
</file>
|
100
91
|
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
101
92
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
|
102
93
|
<provider selected="true" editor-type-id="text-editor">
|
@@ -203,6 +194,21 @@
|
|
203
194
|
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
204
195
|
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
205
196
|
</path>
|
197
|
+
<path>
|
198
|
+
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
199
|
+
<item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
|
200
|
+
</path>
|
201
|
+
<path>
|
202
|
+
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
203
|
+
<item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
|
204
|
+
<item name="< RVM: ruby-2.4.1 >" type="70bed36:NamedLibraryElementNode" />
|
205
|
+
</path>
|
206
|
+
<path>
|
207
|
+
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
208
|
+
<item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
|
209
|
+
<item name="< RVM: ruby-2.4.1 >" type="70bed36:NamedLibraryElementNode" />
|
210
|
+
<item name="rubystubs24" type="462c0819:PsiDirectoryNode" />
|
211
|
+
</path>
|
206
212
|
</expand>
|
207
213
|
<select />
|
208
214
|
</subPane>
|
@@ -251,18 +257,19 @@
|
|
251
257
|
<workItem from="1545966041001" duration="9181000" />
|
252
258
|
<workItem from="1546164127129" duration="10301000" />
|
253
259
|
<workItem from="1546240992243" duration="719000" />
|
254
|
-
<workItem from="1546291493927" duration="
|
260
|
+
<workItem from="1546291493927" duration="464000" />
|
261
|
+
<workItem from="1546436457874" duration="826000" />
|
255
262
|
</task>
|
256
263
|
<servers />
|
257
264
|
</component>
|
258
265
|
<component name="TimeTrackingManager">
|
259
|
-
<option name="totallyTimeSpent" value="
|
266
|
+
<option name="totallyTimeSpent" value="21491000" />
|
260
267
|
</component>
|
261
268
|
<component name="ToolWindowManager">
|
262
269
|
<frame x="0" y="0" width="1680" height="1050" extended-state="6" />
|
263
270
|
<editor active="true" />
|
264
271
|
<layout>
|
265
|
-
<window_info
|
272
|
+
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.25518927" />
|
266
273
|
<window_info anchor="bottom" id="TODO" order="6" />
|
267
274
|
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
|
268
275
|
<window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
|
@@ -270,7 +277,7 @@
|
|
270
277
|
<window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
|
271
278
|
<window_info anchor="bottom" id="Version Control" order="7" />
|
272
279
|
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
|
273
|
-
<window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.34393638" />
|
280
|
+
<window_info active="true" anchor="bottom" id="Terminal" order="7" visible="true" weight="0.34393638" />
|
274
281
|
<window_info id="Favorites" order="2" side_tool="true" />
|
275
282
|
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
276
283
|
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
@@ -290,27 +297,6 @@
|
|
290
297
|
<option name="myLimit" value="2678400000" />
|
291
298
|
</component>
|
292
299
|
<component name="editorHistoryManager">
|
293
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
294
|
-
<provider selected="true" editor-type-id="text-editor">
|
295
|
-
<state relative-caret-position="3495">
|
296
|
-
<caret line="233" lean-forward="true" selection-start-line="233" selection-end-line="233" />
|
297
|
-
</state>
|
298
|
-
</provider>
|
299
|
-
</entry>
|
300
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
|
301
|
-
<provider selected="true" editor-type-id="text-editor">
|
302
|
-
<state relative-caret-position="465">
|
303
|
-
<caret line="31" column="54" selection-start-line="31" selection-start-column="54" selection-end-line="31" selection-end-column="54" />
|
304
|
-
</state>
|
305
|
-
</provider>
|
306
|
-
</entry>
|
307
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
308
|
-
<provider selected="true" editor-type-id="text-editor">
|
309
|
-
<state relative-caret-position="30">
|
310
|
-
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
311
|
-
</state>
|
312
|
-
</provider>
|
313
|
-
</entry>
|
314
300
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
315
301
|
<provider selected="true" editor-type-id="text-editor">
|
316
302
|
<state relative-caret-position="90">
|
@@ -613,17 +599,38 @@
|
|
613
599
|
</state>
|
614
600
|
</provider>
|
615
601
|
</entry>
|
602
|
+
<entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/errno.rb">
|
603
|
+
<provider selected="true" editor-type-id="text-editor">
|
604
|
+
<state relative-caret-position="480">
|
605
|
+
<caret line="32" column="7" selection-start-line="32" selection-start-column="7" selection-end-line="32" selection-end-column="7" />
|
606
|
+
</state>
|
607
|
+
</provider>
|
608
|
+
</entry>
|
616
609
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
617
610
|
<provider selected="true" editor-type-id="text-editor">
|
618
|
-
<state relative-caret-position="
|
619
|
-
<caret line="
|
611
|
+
<state relative-caret-position="267">
|
612
|
+
<caret line="210" column="21" selection-start-line="210" selection-start-column="21" selection-end-line="210" selection-end-column="21" />
|
613
|
+
</state>
|
614
|
+
</provider>
|
615
|
+
</entry>
|
616
|
+
<entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/system_call_error.rb">
|
617
|
+
<provider selected="true" editor-type-id="text-editor">
|
618
|
+
<state relative-caret-position="150">
|
619
|
+
<caret line="13" column="2" lean-forward="true" selection-start-line="13" selection-start-column="2" selection-end-line="13" selection-end-column="2" />
|
620
|
+
</state>
|
621
|
+
</provider>
|
622
|
+
</entry>
|
623
|
+
<entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/webrick/compat.rb">
|
624
|
+
<provider selected="true" editor-type-id="text-editor">
|
625
|
+
<state relative-caret-position="270">
|
626
|
+
<caret line="18" column="1" lean-forward="true" selection-start-line="18" selection-start-column="1" selection-end-line="18" selection-end-column="1" />
|
620
627
|
</state>
|
621
628
|
</provider>
|
622
629
|
</entry>
|
623
630
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
624
631
|
<provider selected="true" editor-type-id="text-editor">
|
625
|
-
<state>
|
626
|
-
<caret
|
632
|
+
<state relative-caret-position="45">
|
633
|
+
<caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
|
627
634
|
</state>
|
628
635
|
</provider>
|
629
636
|
</entry>
|
data/lib/http_crawler/http.rb
CHANGED
@@ -161,7 +161,7 @@ module HttpCrawler
|
|
161
161
|
# 重新请求
|
162
162
|
post_fetch(uri_or_path, initheader, dest, &block)
|
163
163
|
when Net::HTTPProxyAuthenticationRequired then
|
164
|
-
Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{
|
164
|
+
Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{proxy_address}:#{proxy_port}] =>#{address}"
|
165
165
|
if update_proxy?
|
166
166
|
server_error_sleep
|
167
167
|
# 重新请求
|
@@ -186,40 +186,37 @@ module HttpCrawler
|
|
186
186
|
Rails.logger.debug("body => #{body}") if started? && body
|
187
187
|
super(req, body, &block)
|
188
188
|
rescue => error
|
189
|
+
Rails.logger.error "出错了! 错误类型 => #{error.class}"
|
189
190
|
if started?
|
190
191
|
# started? 是为了判断是否结束http请求,如果不添加则会处理2次异常
|
191
192
|
Rails.logger.error("#{req.class} => #{use_ssl? ? "https://" : "http://" }#{address}:#{port}#{req.path}")
|
192
193
|
Rails.logger.error("body => #{body}") if body
|
193
194
|
raise error
|
194
195
|
else
|
196
|
+
http_error_sleep
|
195
197
|
# 最大错误尝试次数
|
196
198
|
if @error_num < @max_error_num
|
197
199
|
@error_num += 1
|
198
|
-
http_error_sleep
|
199
200
|
retry # 这将把控制移到 begin 的开头
|
200
201
|
else
|
202
|
+
|
201
203
|
# 超过最大错误限制 判断错误类型
|
202
204
|
case error
|
203
|
-
when Net::HTTPFatalError
|
204
|
-
raise error
|
205
205
|
when EOFError
|
206
206
|
Rails.logger.warn "EOFError!"
|
207
|
-
if update_proxy?
|
208
|
-
proxy(get_proxy)
|
209
|
-
http_error_sleep
|
210
|
-
retry # 这将把控制移到 begin 的开头
|
211
|
-
else
|
212
|
-
raise error
|
213
|
-
end
|
214
207
|
when Timeout::Error
|
215
208
|
Rails.logger.warn "请求超时!"
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
209
|
+
when Net::HTTPServerException
|
210
|
+
Rails.logger.warn "代理失效:[#{proxy_address}:#{proxy_port}]"
|
211
|
+
when Errno::ECONNREFUSED
|
212
|
+
Rails.logger.warn "Errno::ECONNREFUSED"
|
213
|
+
else
|
214
|
+
raise error
|
215
|
+
end
|
216
|
+
|
217
|
+
if update_proxy?
|
218
|
+
@error_num = 0
|
219
|
+
retry # 这将把控制移到 begin 的开头
|
223
220
|
else
|
224
221
|
raise error
|
225
222
|
end
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.2.
|
4
|
+
version: 0.2.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|