http_crawler 0.2.2.7 → 0.2.2.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 93396be4c3f0ece060f0869683020b02f9b9f73e
4
- data.tar.gz: c7725107df258294397070dd7c6fcf509c653a59
3
+ metadata.gz: 4126016201a30cb18b5cd7fe089d64fd14b9757f
4
+ data.tar.gz: 57f61d877b5ecf879293d8426026dc46629c9031
5
5
  SHA512:
6
- metadata.gz: ada6ea728dab4a017d85debc1fae638afbd88a957d49f5de44863aafa5002c98744921dcbc8b4593b6037b68f50c1b6b1678a2969a2b3a09accbe2e98e1d92a4
7
- data.tar.gz: 62edc56e250942d8bfcf8fbd18866c3913b8b3a15c8478729652816bdb345a232afd3f9615a125432daf4530e31f926dd96c6fd81342277a3bc547434d26a789
6
+ metadata.gz: 0bc0225909563a93b8c7d655099ef27895deb469479c21c6827ce23448454915fdaecc5073473a363e9dae30c1aedeeb8bf6c46e6b6b17b40b582f2ec6fe2326
7
+ data.tar.gz: 74f6022b992c49b4020d50d2dd824176d4c914e46bdb788439f3a1152c4c28d642b8a795926a95ad6e31b421d0715f9f0f69e12629831447ba9ed67809de7656
data/.idea/workspace.xml CHANGED
@@ -28,8 +28,8 @@
28
28
  <file leaf-file-name="version.rb" pinned="false" current-in-tab="true">
29
29
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
30
30
  <provider selected="true" editor-type-id="text-editor">
31
- <state>
32
- <caret column="18" lean-forward="true" selection-start-column="18" selection-end-column="18" />
31
+ <state relative-caret-position="45">
32
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
33
33
  </state>
34
34
  </provider>
35
35
  </entry>
@@ -46,35 +46,35 @@
46
46
  <file leaf-file-name="http.rb" pinned="false" current-in-tab="false">
47
47
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
48
48
  <provider selected="true" editor-type-id="text-editor">
49
- <state relative-caret-position="40">
50
- <caret line="202" column="36" selection-start-line="202" selection-start-column="36" selection-end-line="202" selection-end-column="36" />
49
+ <state relative-caret-position="267">
50
+ <caret line="210" column="21" selection-start-line="210" selection-start-column="21" selection-end-line="210" selection-end-column="21" />
51
51
  </state>
52
52
  </provider>
53
53
  </entry>
54
54
  </file>
55
- <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
56
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
55
+ <file leaf-file-name="compat.rb" pinned="false" current-in-tab="false">
56
+ <entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/webrick/compat.rb">
57
57
  <provider selected="true" editor-type-id="text-editor">
58
- <state relative-caret-position="465">
59
- <caret line="31" column="54" selection-start-line="31" selection-start-column="54" selection-end-line="31" selection-end-column="54" />
58
+ <state relative-caret-position="270">
59
+ <caret line="18" column="1" lean-forward="true" selection-start-line="18" selection-start-column="1" selection-end-line="18" selection-end-column="1" />
60
60
  </state>
61
61
  </provider>
62
62
  </entry>
63
63
  </file>
64
- <file leaf-file-name="common.rb" pinned="false" current-in-tab="false">
65
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
64
+ <file leaf-file-name="errno.rb" pinned="false" current-in-tab="false">
65
+ <entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/errno.rb">
66
66
  <provider selected="true" editor-type-id="text-editor">
67
- <state relative-caret-position="30">
68
- <caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
67
+ <state relative-caret-position="480">
68
+ <caret line="32" column="7" selection-start-line="32" selection-start-column="7" selection-end-line="32" selection-end-column="7" />
69
69
  </state>
70
70
  </provider>
71
71
  </entry>
72
72
  </file>
73
- <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
74
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
73
+ <file leaf-file-name="common.rb" pinned="false" current-in-tab="false">
74
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
75
75
  <provider selected="true" editor-type-id="text-editor">
76
- <state relative-caret-position="90">
77
- <caret line="6" column="36" selection-start-line="6" selection-start-column="36" selection-end-line="6" selection-end-column="36" />
76
+ <state relative-caret-position="30">
77
+ <caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
78
78
  </state>
79
79
  </provider>
80
80
  </entry>
@@ -88,15 +88,6 @@
88
88
  </provider>
89
89
  </entry>
90
90
  </file>
91
- <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
92
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
93
- <provider selected="true" editor-type-id="text-editor">
94
- <state relative-caret-position="150">
95
- <caret line="10" column="48" selection-start-line="10" selection-start-column="48" selection-end-line="10" selection-end-column="48" />
96
- </state>
97
- </provider>
98
- </entry>
99
- </file>
100
91
  <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
101
92
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
102
93
  <provider selected="true" editor-type-id="text-editor">
@@ -203,6 +194,21 @@
203
194
  <item name="lib" type="462c0819:PsiDirectoryNode" />
204
195
  <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
205
196
  </path>
197
+ <path>
198
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
199
+ <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
200
+ </path>
201
+ <path>
202
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
203
+ <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
204
+ <item name="&lt; RVM: ruby-2.4.1 &gt;" type="70bed36:NamedLibraryElementNode" />
205
+ </path>
206
+ <path>
207
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
208
+ <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
209
+ <item name="&lt; RVM: ruby-2.4.1 &gt;" type="70bed36:NamedLibraryElementNode" />
210
+ <item name="rubystubs24" type="462c0819:PsiDirectoryNode" />
211
+ </path>
206
212
  </expand>
207
213
  <select />
208
214
  </subPane>
@@ -251,18 +257,19 @@
251
257
  <workItem from="1545966041001" duration="9181000" />
252
258
  <workItem from="1546164127129" duration="10301000" />
253
259
  <workItem from="1546240992243" duration="719000" />
254
- <workItem from="1546291493927" duration="26000" />
260
+ <workItem from="1546291493927" duration="464000" />
261
+ <workItem from="1546436457874" duration="826000" />
255
262
  </task>
256
263
  <servers />
257
264
  </component>
258
265
  <component name="TimeTrackingManager">
259
- <option name="totallyTimeSpent" value="20227000" />
266
+ <option name="totallyTimeSpent" value="21491000" />
260
267
  </component>
261
268
  <component name="ToolWindowManager">
262
269
  <frame x="0" y="0" width="1680" height="1050" extended-state="6" />
263
270
  <editor active="true" />
264
271
  <layout>
265
- <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.25213677" />
272
+ <window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.25518927" />
266
273
  <window_info anchor="bottom" id="TODO" order="6" />
267
274
  <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
268
275
  <window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
@@ -270,7 +277,7 @@
270
277
  <window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
271
278
  <window_info anchor="bottom" id="Version Control" order="7" />
272
279
  <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
273
- <window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.34393638" />
280
+ <window_info active="true" anchor="bottom" id="Terminal" order="7" visible="true" weight="0.34393638" />
274
281
  <window_info id="Favorites" order="2" side_tool="true" />
275
282
  <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
276
283
  <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
@@ -290,27 +297,6 @@
290
297
  <option name="myLimit" value="2678400000" />
291
298
  </component>
292
299
  <component name="editorHistoryManager">
293
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
294
- <provider selected="true" editor-type-id="text-editor">
295
- <state relative-caret-position="3495">
296
- <caret line="233" lean-forward="true" selection-start-line="233" selection-end-line="233" />
297
- </state>
298
- </provider>
299
- </entry>
300
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
301
- <provider selected="true" editor-type-id="text-editor">
302
- <state relative-caret-position="465">
303
- <caret line="31" column="54" selection-start-line="31" selection-start-column="54" selection-end-line="31" selection-end-column="54" />
304
- </state>
305
- </provider>
306
- </entry>
307
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
308
- <provider selected="true" editor-type-id="text-editor">
309
- <state relative-caret-position="30">
310
- <caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
311
- </state>
312
- </provider>
313
- </entry>
314
300
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
315
301
  <provider selected="true" editor-type-id="text-editor">
316
302
  <state relative-caret-position="90">
@@ -613,17 +599,38 @@
613
599
  </state>
614
600
  </provider>
615
601
  </entry>
602
+ <entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/errno.rb">
603
+ <provider selected="true" editor-type-id="text-editor">
604
+ <state relative-caret-position="480">
605
+ <caret line="32" column="7" selection-start-line="32" selection-start-column="7" selection-end-line="32" selection-end-column="7" />
606
+ </state>
607
+ </provider>
608
+ </entry>
616
609
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
617
610
  <provider selected="true" editor-type-id="text-editor">
618
- <state relative-caret-position="40">
619
- <caret line="202" column="36" selection-start-line="202" selection-start-column="36" selection-end-line="202" selection-end-column="36" />
611
+ <state relative-caret-position="267">
612
+ <caret line="210" column="21" selection-start-line="210" selection-start-column="21" selection-end-line="210" selection-end-column="21" />
613
+ </state>
614
+ </provider>
615
+ </entry>
616
+ <entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/system_call_error.rb">
617
+ <provider selected="true" editor-type-id="text-editor">
618
+ <state relative-caret-position="150">
619
+ <caret line="13" column="2" lean-forward="true" selection-start-line="13" selection-start-column="2" selection-end-line="13" selection-end-column="2" />
620
+ </state>
621
+ </provider>
622
+ </entry>
623
+ <entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/webrick/compat.rb">
624
+ <provider selected="true" editor-type-id="text-editor">
625
+ <state relative-caret-position="270">
626
+ <caret line="18" column="1" lean-forward="true" selection-start-line="18" selection-start-column="1" selection-end-line="18" selection-end-column="1" />
620
627
  </state>
621
628
  </provider>
622
629
  </entry>
623
630
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
624
631
  <provider selected="true" editor-type-id="text-editor">
625
- <state>
626
- <caret column="18" lean-forward="true" selection-start-column="18" selection-end-column="18" />
632
+ <state relative-caret-position="45">
633
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
627
634
  </state>
628
635
  </provider>
629
636
  </entry>
@@ -161,7 +161,7 @@ module HttpCrawler
161
161
  # 重新请求
162
162
  post_fetch(uri_or_path, initheader, dest, &block)
163
163
  when Net::HTTPProxyAuthenticationRequired then
164
- Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{http.proxy_address}:#{http.proxy_port}] =>#{address}"
164
+ Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{proxy_address}:#{proxy_port}] =>#{address}"
165
165
  if update_proxy?
166
166
  server_error_sleep
167
167
  # 重新请求
@@ -186,40 +186,37 @@ module HttpCrawler
186
186
  Rails.logger.debug("body => #{body}") if started? && body
187
187
  super(req, body, &block)
188
188
  rescue => error
189
+ Rails.logger.error "出错了! 错误类型 => #{error.class}"
189
190
  if started?
190
191
  # started? 是为了判断是否结束http请求,如果不添加则会处理2次异常
191
192
  Rails.logger.error("#{req.class} => #{use_ssl? ? "https://" : "http://" }#{address}:#{port}#{req.path}")
192
193
  Rails.logger.error("body => #{body}") if body
193
194
  raise error
194
195
  else
196
+ http_error_sleep
195
197
  # 最大错误尝试次数
196
198
  if @error_num < @max_error_num
197
199
  @error_num += 1
198
- http_error_sleep
199
200
  retry # 这将把控制移到 begin 的开头
200
201
  else
202
+
201
203
  # 超过最大错误限制 判断错误类型
202
204
  case error
203
- when Net::HTTPFatalError
204
- raise error
205
205
  when EOFError
206
206
  Rails.logger.warn "EOFError!"
207
- if update_proxy?
208
- proxy(get_proxy)
209
- http_error_sleep
210
- retry # 这将把控制移到 begin 的开头
211
- else
212
- raise error
213
- end
214
207
  when Timeout::Error
215
208
  Rails.logger.warn "请求超时!"
216
- if update_proxy?
217
- @error_num = 0
218
- http_error_sleep
219
- retry # 这将把控制移到 begin 的开头
220
- else
221
- raise error
222
- end
209
+ when Net::HTTPServerException
210
+ Rails.logger.warn "代理失效:[#{proxy_address}:#{proxy_port}]"
211
+ when Errno::ECONNREFUSED
212
+ Rails.logger.warn "Errno::ECONNREFUSED"
213
+ else
214
+ raise error
215
+ end
216
+
217
+ if update_proxy?
218
+ @error_num = 0
219
+ retry # 这将把控制移到 begin 的开头
223
220
  else
224
221
  raise error
225
222
  end
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.2.2.7"
2
+ VERSION = "0.2.2.8"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2.7
4
+ version: 0.2.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-12-31 00:00:00.000000000 Z
11
+ date: 2019-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec