http_crawler 0.2.2.7 → 0.2.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.idea/workspace.xml +61 -54
- data/lib/http_crawler/http.rb +15 -18
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4126016201a30cb18b5cd7fe089d64fd14b9757f
|
|
4
|
+
data.tar.gz: 57f61d877b5ecf879293d8426026dc46629c9031
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0bc0225909563a93b8c7d655099ef27895deb469479c21c6827ce23448454915fdaecc5073473a363e9dae30c1aedeeb8bf6c46e6b6b17b40b582f2ec6fe2326
|
|
7
|
+
data.tar.gz: 74f6022b992c49b4020d50d2dd824176d4c914e46bdb788439f3a1152c4c28d642b8a795926a95ad6e31b421d0715f9f0f69e12629831447ba9ed67809de7656
|
data/.idea/workspace.xml
CHANGED
|
@@ -28,8 +28,8 @@
|
|
|
28
28
|
<file leaf-file-name="version.rb" pinned="false" current-in-tab="true">
|
|
29
29
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
|
30
30
|
<provider selected="true" editor-type-id="text-editor">
|
|
31
|
-
<state>
|
|
32
|
-
<caret
|
|
31
|
+
<state relative-caret-position="45">
|
|
32
|
+
<caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
|
|
33
33
|
</state>
|
|
34
34
|
</provider>
|
|
35
35
|
</entry>
|
|
@@ -46,35 +46,35 @@
|
|
|
46
46
|
<file leaf-file-name="http.rb" pinned="false" current-in-tab="false">
|
|
47
47
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
|
48
48
|
<provider selected="true" editor-type-id="text-editor">
|
|
49
|
-
<state relative-caret-position="
|
|
50
|
-
<caret line="
|
|
49
|
+
<state relative-caret-position="267">
|
|
50
|
+
<caret line="210" column="21" selection-start-line="210" selection-start-column="21" selection-end-line="210" selection-end-column="21" />
|
|
51
51
|
</state>
|
|
52
52
|
</provider>
|
|
53
53
|
</entry>
|
|
54
54
|
</file>
|
|
55
|
-
<file leaf-file-name="
|
|
56
|
-
<entry file="file://$
|
|
55
|
+
<file leaf-file-name="compat.rb" pinned="false" current-in-tab="false">
|
|
56
|
+
<entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/webrick/compat.rb">
|
|
57
57
|
<provider selected="true" editor-type-id="text-editor">
|
|
58
|
-
<state relative-caret-position="
|
|
59
|
-
<caret line="
|
|
58
|
+
<state relative-caret-position="270">
|
|
59
|
+
<caret line="18" column="1" lean-forward="true" selection-start-line="18" selection-start-column="1" selection-end-line="18" selection-end-column="1" />
|
|
60
60
|
</state>
|
|
61
61
|
</provider>
|
|
62
62
|
</entry>
|
|
63
63
|
</file>
|
|
64
|
-
<file leaf-file-name="
|
|
65
|
-
<entry file="file://$
|
|
64
|
+
<file leaf-file-name="errno.rb" pinned="false" current-in-tab="false">
|
|
65
|
+
<entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/errno.rb">
|
|
66
66
|
<provider selected="true" editor-type-id="text-editor">
|
|
67
|
-
<state relative-caret-position="
|
|
68
|
-
<caret line="
|
|
67
|
+
<state relative-caret-position="480">
|
|
68
|
+
<caret line="32" column="7" selection-start-line="32" selection-start-column="7" selection-end-line="32" selection-end-column="7" />
|
|
69
69
|
</state>
|
|
70
70
|
</provider>
|
|
71
71
|
</entry>
|
|
72
72
|
</file>
|
|
73
|
-
<file leaf-file-name="
|
|
74
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
73
|
+
<file leaf-file-name="common.rb" pinned="false" current-in-tab="false">
|
|
74
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
|
75
75
|
<provider selected="true" editor-type-id="text-editor">
|
|
76
|
-
<state relative-caret-position="
|
|
77
|
-
<caret line="
|
|
76
|
+
<state relative-caret-position="30">
|
|
77
|
+
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
|
78
78
|
</state>
|
|
79
79
|
</provider>
|
|
80
80
|
</entry>
|
|
@@ -88,15 +88,6 @@
|
|
|
88
88
|
</provider>
|
|
89
89
|
</entry>
|
|
90
90
|
</file>
|
|
91
|
-
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
|
92
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
|
93
|
-
<provider selected="true" editor-type-id="text-editor">
|
|
94
|
-
<state relative-caret-position="150">
|
|
95
|
-
<caret line="10" column="48" selection-start-line="10" selection-start-column="48" selection-end-line="10" selection-end-column="48" />
|
|
96
|
-
</state>
|
|
97
|
-
</provider>
|
|
98
|
-
</entry>
|
|
99
|
-
</file>
|
|
100
91
|
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
|
101
92
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
|
|
102
93
|
<provider selected="true" editor-type-id="text-editor">
|
|
@@ -203,6 +194,21 @@
|
|
|
203
194
|
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
|
204
195
|
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
205
196
|
</path>
|
|
197
|
+
<path>
|
|
198
|
+
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
|
199
|
+
<item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
|
|
200
|
+
</path>
|
|
201
|
+
<path>
|
|
202
|
+
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
|
203
|
+
<item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
|
|
204
|
+
<item name="< RVM: ruby-2.4.1 >" type="70bed36:NamedLibraryElementNode" />
|
|
205
|
+
</path>
|
|
206
|
+
<path>
|
|
207
|
+
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
|
208
|
+
<item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
|
|
209
|
+
<item name="< RVM: ruby-2.4.1 >" type="70bed36:NamedLibraryElementNode" />
|
|
210
|
+
<item name="rubystubs24" type="462c0819:PsiDirectoryNode" />
|
|
211
|
+
</path>
|
|
206
212
|
</expand>
|
|
207
213
|
<select />
|
|
208
214
|
</subPane>
|
|
@@ -251,18 +257,19 @@
|
|
|
251
257
|
<workItem from="1545966041001" duration="9181000" />
|
|
252
258
|
<workItem from="1546164127129" duration="10301000" />
|
|
253
259
|
<workItem from="1546240992243" duration="719000" />
|
|
254
|
-
<workItem from="1546291493927" duration="
|
|
260
|
+
<workItem from="1546291493927" duration="464000" />
|
|
261
|
+
<workItem from="1546436457874" duration="826000" />
|
|
255
262
|
</task>
|
|
256
263
|
<servers />
|
|
257
264
|
</component>
|
|
258
265
|
<component name="TimeTrackingManager">
|
|
259
|
-
<option name="totallyTimeSpent" value="
|
|
266
|
+
<option name="totallyTimeSpent" value="21491000" />
|
|
260
267
|
</component>
|
|
261
268
|
<component name="ToolWindowManager">
|
|
262
269
|
<frame x="0" y="0" width="1680" height="1050" extended-state="6" />
|
|
263
270
|
<editor active="true" />
|
|
264
271
|
<layout>
|
|
265
|
-
<window_info
|
|
272
|
+
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.25518927" />
|
|
266
273
|
<window_info anchor="bottom" id="TODO" order="6" />
|
|
267
274
|
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
|
|
268
275
|
<window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
|
|
@@ -270,7 +277,7 @@
|
|
|
270
277
|
<window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
|
|
271
278
|
<window_info anchor="bottom" id="Version Control" order="7" />
|
|
272
279
|
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
|
|
273
|
-
<window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.34393638" />
|
|
280
|
+
<window_info active="true" anchor="bottom" id="Terminal" order="7" visible="true" weight="0.34393638" />
|
|
274
281
|
<window_info id="Favorites" order="2" side_tool="true" />
|
|
275
282
|
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
|
276
283
|
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
|
@@ -290,27 +297,6 @@
|
|
|
290
297
|
<option name="myLimit" value="2678400000" />
|
|
291
298
|
</component>
|
|
292
299
|
<component name="editorHistoryManager">
|
|
293
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
|
294
|
-
<provider selected="true" editor-type-id="text-editor">
|
|
295
|
-
<state relative-caret-position="3495">
|
|
296
|
-
<caret line="233" lean-forward="true" selection-start-line="233" selection-end-line="233" />
|
|
297
|
-
</state>
|
|
298
|
-
</provider>
|
|
299
|
-
</entry>
|
|
300
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
|
|
301
|
-
<provider selected="true" editor-type-id="text-editor">
|
|
302
|
-
<state relative-caret-position="465">
|
|
303
|
-
<caret line="31" column="54" selection-start-line="31" selection-start-column="54" selection-end-line="31" selection-end-column="54" />
|
|
304
|
-
</state>
|
|
305
|
-
</provider>
|
|
306
|
-
</entry>
|
|
307
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
|
308
|
-
<provider selected="true" editor-type-id="text-editor">
|
|
309
|
-
<state relative-caret-position="30">
|
|
310
|
-
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
|
311
|
-
</state>
|
|
312
|
-
</provider>
|
|
313
|
-
</entry>
|
|
314
300
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
|
315
301
|
<provider selected="true" editor-type-id="text-editor">
|
|
316
302
|
<state relative-caret-position="90">
|
|
@@ -613,17 +599,38 @@
|
|
|
613
599
|
</state>
|
|
614
600
|
</provider>
|
|
615
601
|
</entry>
|
|
602
|
+
<entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/errno.rb">
|
|
603
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
604
|
+
<state relative-caret-position="480">
|
|
605
|
+
<caret line="32" column="7" selection-start-line="32" selection-start-column="7" selection-end-line="32" selection-end-column="7" />
|
|
606
|
+
</state>
|
|
607
|
+
</provider>
|
|
608
|
+
</entry>
|
|
616
609
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
|
617
610
|
<provider selected="true" editor-type-id="text-editor">
|
|
618
|
-
<state relative-caret-position="
|
|
619
|
-
<caret line="
|
|
611
|
+
<state relative-caret-position="267">
|
|
612
|
+
<caret line="210" column="21" selection-start-line="210" selection-start-column="21" selection-end-line="210" selection-end-column="21" />
|
|
613
|
+
</state>
|
|
614
|
+
</provider>
|
|
615
|
+
</entry>
|
|
616
|
+
<entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/system_call_error.rb">
|
|
617
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
618
|
+
<state relative-caret-position="150">
|
|
619
|
+
<caret line="13" column="2" lean-forward="true" selection-start-line="13" selection-start-column="2" selection-end-line="13" selection-end-column="2" />
|
|
620
|
+
</state>
|
|
621
|
+
</provider>
|
|
622
|
+
</entry>
|
|
623
|
+
<entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/webrick/compat.rb">
|
|
624
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
625
|
+
<state relative-caret-position="270">
|
|
626
|
+
<caret line="18" column="1" lean-forward="true" selection-start-line="18" selection-start-column="1" selection-end-line="18" selection-end-column="1" />
|
|
620
627
|
</state>
|
|
621
628
|
</provider>
|
|
622
629
|
</entry>
|
|
623
630
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
|
624
631
|
<provider selected="true" editor-type-id="text-editor">
|
|
625
|
-
<state>
|
|
626
|
-
<caret
|
|
632
|
+
<state relative-caret-position="45">
|
|
633
|
+
<caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
|
|
627
634
|
</state>
|
|
628
635
|
</provider>
|
|
629
636
|
</entry>
|
data/lib/http_crawler/http.rb
CHANGED
|
@@ -161,7 +161,7 @@ module HttpCrawler
|
|
|
161
161
|
# 重新请求
|
|
162
162
|
post_fetch(uri_or_path, initheader, dest, &block)
|
|
163
163
|
when Net::HTTPProxyAuthenticationRequired then
|
|
164
|
-
Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{
|
|
164
|
+
Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{proxy_address}:#{proxy_port}] =>#{address}"
|
|
165
165
|
if update_proxy?
|
|
166
166
|
server_error_sleep
|
|
167
167
|
# 重新请求
|
|
@@ -186,40 +186,37 @@ module HttpCrawler
|
|
|
186
186
|
Rails.logger.debug("body => #{body}") if started? && body
|
|
187
187
|
super(req, body, &block)
|
|
188
188
|
rescue => error
|
|
189
|
+
Rails.logger.error "出错了! 错误类型 => #{error.class}"
|
|
189
190
|
if started?
|
|
190
191
|
# started? 是为了判断是否结束http请求,如果不添加则会处理2次异常
|
|
191
192
|
Rails.logger.error("#{req.class} => #{use_ssl? ? "https://" : "http://" }#{address}:#{port}#{req.path}")
|
|
192
193
|
Rails.logger.error("body => #{body}") if body
|
|
193
194
|
raise error
|
|
194
195
|
else
|
|
196
|
+
http_error_sleep
|
|
195
197
|
# 最大错误尝试次数
|
|
196
198
|
if @error_num < @max_error_num
|
|
197
199
|
@error_num += 1
|
|
198
|
-
http_error_sleep
|
|
199
200
|
retry # 这将把控制移到 begin 的开头
|
|
200
201
|
else
|
|
202
|
+
|
|
201
203
|
# 超过最大错误限制 判断错误类型
|
|
202
204
|
case error
|
|
203
|
-
when Net::HTTPFatalError
|
|
204
|
-
raise error
|
|
205
205
|
when EOFError
|
|
206
206
|
Rails.logger.warn "EOFError!"
|
|
207
|
-
if update_proxy?
|
|
208
|
-
proxy(get_proxy)
|
|
209
|
-
http_error_sleep
|
|
210
|
-
retry # 这将把控制移到 begin 的开头
|
|
211
|
-
else
|
|
212
|
-
raise error
|
|
213
|
-
end
|
|
214
207
|
when Timeout::Error
|
|
215
208
|
Rails.logger.warn "请求超时!"
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
209
|
+
when Net::HTTPServerException
|
|
210
|
+
Rails.logger.warn "代理失效:[#{proxy_address}:#{proxy_port}]"
|
|
211
|
+
when Errno::ECONNREFUSED
|
|
212
|
+
Rails.logger.warn "Errno::ECONNREFUSED"
|
|
213
|
+
else
|
|
214
|
+
raise error
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
if update_proxy?
|
|
218
|
+
@error_num = 0
|
|
219
|
+
retry # 这将把控制移到 begin 的开头
|
|
223
220
|
else
|
|
224
221
|
raise error
|
|
225
222
|
end
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: http_crawler
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.2.
|
|
4
|
+
version: 0.2.2.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- jagger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2019-01-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|