http_crawler 0.2.2.7 → 0.2.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 93396be4c3f0ece060f0869683020b02f9b9f73e
4
- data.tar.gz: c7725107df258294397070dd7c6fcf509c653a59
3
+ metadata.gz: 4126016201a30cb18b5cd7fe089d64fd14b9757f
4
+ data.tar.gz: 57f61d877b5ecf879293d8426026dc46629c9031
5
5
  SHA512:
6
- metadata.gz: ada6ea728dab4a017d85debc1fae638afbd88a957d49f5de44863aafa5002c98744921dcbc8b4593b6037b68f50c1b6b1678a2969a2b3a09accbe2e98e1d92a4
7
- data.tar.gz: 62edc56e250942d8bfcf8fbd18866c3913b8b3a15c8478729652816bdb345a232afd3f9615a125432daf4530e31f926dd96c6fd81342277a3bc547434d26a789
6
+ metadata.gz: 0bc0225909563a93b8c7d655099ef27895deb469479c21c6827ce23448454915fdaecc5073473a363e9dae30c1aedeeb8bf6c46e6b6b17b40b582f2ec6fe2326
7
+ data.tar.gz: 74f6022b992c49b4020d50d2dd824176d4c914e46bdb788439f3a1152c4c28d642b8a795926a95ad6e31b421d0715f9f0f69e12629831447ba9ed67809de7656
data/.idea/workspace.xml CHANGED
@@ -28,8 +28,8 @@
28
28
  <file leaf-file-name="version.rb" pinned="false" current-in-tab="true">
29
29
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
30
30
  <provider selected="true" editor-type-id="text-editor">
31
- <state>
32
- <caret column="18" lean-forward="true" selection-start-column="18" selection-end-column="18" />
31
+ <state relative-caret-position="45">
32
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
33
33
  </state>
34
34
  </provider>
35
35
  </entry>
@@ -46,35 +46,35 @@
46
46
  <file leaf-file-name="http.rb" pinned="false" current-in-tab="false">
47
47
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
48
48
  <provider selected="true" editor-type-id="text-editor">
49
- <state relative-caret-position="40">
50
- <caret line="202" column="36" selection-start-line="202" selection-start-column="36" selection-end-line="202" selection-end-column="36" />
49
+ <state relative-caret-position="267">
50
+ <caret line="210" column="21" selection-start-line="210" selection-start-column="21" selection-end-line="210" selection-end-column="21" />
51
51
  </state>
52
52
  </provider>
53
53
  </entry>
54
54
  </file>
55
- <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
56
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
55
+ <file leaf-file-name="compat.rb" pinned="false" current-in-tab="false">
56
+ <entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/webrick/compat.rb">
57
57
  <provider selected="true" editor-type-id="text-editor">
58
- <state relative-caret-position="465">
59
- <caret line="31" column="54" selection-start-line="31" selection-start-column="54" selection-end-line="31" selection-end-column="54" />
58
+ <state relative-caret-position="270">
59
+ <caret line="18" column="1" lean-forward="true" selection-start-line="18" selection-start-column="1" selection-end-line="18" selection-end-column="1" />
60
60
  </state>
61
61
  </provider>
62
62
  </entry>
63
63
  </file>
64
- <file leaf-file-name="common.rb" pinned="false" current-in-tab="false">
65
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
64
+ <file leaf-file-name="errno.rb" pinned="false" current-in-tab="false">
65
+ <entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/errno.rb">
66
66
  <provider selected="true" editor-type-id="text-editor">
67
- <state relative-caret-position="30">
68
- <caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
67
+ <state relative-caret-position="480">
68
+ <caret line="32" column="7" selection-start-line="32" selection-start-column="7" selection-end-line="32" selection-end-column="7" />
69
69
  </state>
70
70
  </provider>
71
71
  </entry>
72
72
  </file>
73
- <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
74
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
73
+ <file leaf-file-name="common.rb" pinned="false" current-in-tab="false">
74
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
75
75
  <provider selected="true" editor-type-id="text-editor">
76
- <state relative-caret-position="90">
77
- <caret line="6" column="36" selection-start-line="6" selection-start-column="36" selection-end-line="6" selection-end-column="36" />
76
+ <state relative-caret-position="30">
77
+ <caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
78
78
  </state>
79
79
  </provider>
80
80
  </entry>
@@ -88,15 +88,6 @@
88
88
  </provider>
89
89
  </entry>
90
90
  </file>
91
- <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
92
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
93
- <provider selected="true" editor-type-id="text-editor">
94
- <state relative-caret-position="150">
95
- <caret line="10" column="48" selection-start-line="10" selection-start-column="48" selection-end-line="10" selection-end-column="48" />
96
- </state>
97
- </provider>
98
- </entry>
99
- </file>
100
91
  <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
101
92
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
102
93
  <provider selected="true" editor-type-id="text-editor">
@@ -203,6 +194,21 @@
203
194
  <item name="lib" type="462c0819:PsiDirectoryNode" />
204
195
  <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
205
196
  </path>
197
+ <path>
198
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
199
+ <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
200
+ </path>
201
+ <path>
202
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
203
+ <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
204
+ <item name="&lt; RVM: ruby-2.4.1 &gt;" type="70bed36:NamedLibraryElementNode" />
205
+ </path>
206
+ <path>
207
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
208
+ <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
209
+ <item name="&lt; RVM: ruby-2.4.1 &gt;" type="70bed36:NamedLibraryElementNode" />
210
+ <item name="rubystubs24" type="462c0819:PsiDirectoryNode" />
211
+ </path>
206
212
  </expand>
207
213
  <select />
208
214
  </subPane>
@@ -251,18 +257,19 @@
251
257
  <workItem from="1545966041001" duration="9181000" />
252
258
  <workItem from="1546164127129" duration="10301000" />
253
259
  <workItem from="1546240992243" duration="719000" />
254
- <workItem from="1546291493927" duration="26000" />
260
+ <workItem from="1546291493927" duration="464000" />
261
+ <workItem from="1546436457874" duration="826000" />
255
262
  </task>
256
263
  <servers />
257
264
  </component>
258
265
  <component name="TimeTrackingManager">
259
- <option name="totallyTimeSpent" value="20227000" />
266
+ <option name="totallyTimeSpent" value="21491000" />
260
267
  </component>
261
268
  <component name="ToolWindowManager">
262
269
  <frame x="0" y="0" width="1680" height="1050" extended-state="6" />
263
270
  <editor active="true" />
264
271
  <layout>
265
- <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.25213677" />
272
+ <window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.25518927" />
266
273
  <window_info anchor="bottom" id="TODO" order="6" />
267
274
  <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
268
275
  <window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
@@ -270,7 +277,7 @@
270
277
  <window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
271
278
  <window_info anchor="bottom" id="Version Control" order="7" />
272
279
  <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
273
- <window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.34393638" />
280
+ <window_info active="true" anchor="bottom" id="Terminal" order="7" visible="true" weight="0.34393638" />
274
281
  <window_info id="Favorites" order="2" side_tool="true" />
275
282
  <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
276
283
  <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
@@ -290,27 +297,6 @@
290
297
  <option name="myLimit" value="2678400000" />
291
298
  </component>
292
299
  <component name="editorHistoryManager">
293
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
294
- <provider selected="true" editor-type-id="text-editor">
295
- <state relative-caret-position="3495">
296
- <caret line="233" lean-forward="true" selection-start-line="233" selection-end-line="233" />
297
- </state>
298
- </provider>
299
- </entry>
300
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
301
- <provider selected="true" editor-type-id="text-editor">
302
- <state relative-caret-position="465">
303
- <caret line="31" column="54" selection-start-line="31" selection-start-column="54" selection-end-line="31" selection-end-column="54" />
304
- </state>
305
- </provider>
306
- </entry>
307
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
308
- <provider selected="true" editor-type-id="text-editor">
309
- <state relative-caret-position="30">
310
- <caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
311
- </state>
312
- </provider>
313
- </entry>
314
300
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
315
301
  <provider selected="true" editor-type-id="text-editor">
316
302
  <state relative-caret-position="90">
@@ -613,17 +599,38 @@
613
599
  </state>
614
600
  </provider>
615
601
  </entry>
602
+ <entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/errno.rb">
603
+ <provider selected="true" editor-type-id="text-editor">
604
+ <state relative-caret-position="480">
605
+ <caret line="32" column="7" selection-start-line="32" selection-start-column="7" selection-end-line="32" selection-end-column="7" />
606
+ </state>
607
+ </provider>
608
+ </entry>
616
609
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
617
610
  <provider selected="true" editor-type-id="text-editor">
618
- <state relative-caret-position="40">
619
- <caret line="202" column="36" selection-start-line="202" selection-start-column="36" selection-end-line="202" selection-end-column="36" />
611
+ <state relative-caret-position="267">
612
+ <caret line="210" column="21" selection-start-line="210" selection-start-column="21" selection-end-line="210" selection-end-column="21" />
613
+ </state>
614
+ </provider>
615
+ </entry>
616
+ <entry file="file://$APPLICATION_HOME_DIR$/rubystubs24/system_call_error.rb">
617
+ <provider selected="true" editor-type-id="text-editor">
618
+ <state relative-caret-position="150">
619
+ <caret line="13" column="2" lean-forward="true" selection-start-line="13" selection-start-column="2" selection-end-line="13" selection-end-column="2" />
620
+ </state>
621
+ </provider>
622
+ </entry>
623
+ <entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/webrick/compat.rb">
624
+ <provider selected="true" editor-type-id="text-editor">
625
+ <state relative-caret-position="270">
626
+ <caret line="18" column="1" lean-forward="true" selection-start-line="18" selection-start-column="1" selection-end-line="18" selection-end-column="1" />
620
627
  </state>
621
628
  </provider>
622
629
  </entry>
623
630
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
624
631
  <provider selected="true" editor-type-id="text-editor">
625
- <state>
626
- <caret column="18" lean-forward="true" selection-start-column="18" selection-end-column="18" />
632
+ <state relative-caret-position="45">
633
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
627
634
  </state>
628
635
  </provider>
629
636
  </entry>
@@ -161,7 +161,7 @@ module HttpCrawler
161
161
  # 重新请求
162
162
  post_fetch(uri_or_path, initheader, dest, &block)
163
163
  when Net::HTTPProxyAuthenticationRequired then
164
- Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{http.proxy_address}:#{http.proxy_port}] =>#{address}"
164
+ Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{proxy_address}:#{proxy_port}] =>#{address}"
165
165
  if update_proxy?
166
166
  server_error_sleep
167
167
  # 重新请求
@@ -186,40 +186,37 @@ module HttpCrawler
186
186
  Rails.logger.debug("body => #{body}") if started? && body
187
187
  super(req, body, &block)
188
188
  rescue => error
189
+ Rails.logger.error "出错了! 错误类型 => #{error.class}"
189
190
  if started?
190
191
  # started? 是为了判断是否结束http请求,如果不添加则会处理2次异常
191
192
  Rails.logger.error("#{req.class} => #{use_ssl? ? "https://" : "http://" }#{address}:#{port}#{req.path}")
192
193
  Rails.logger.error("body => #{body}") if body
193
194
  raise error
194
195
  else
196
+ http_error_sleep
195
197
  # 最大错误尝试次数
196
198
  if @error_num < @max_error_num
197
199
  @error_num += 1
198
- http_error_sleep
199
200
  retry # 这将把控制移到 begin 的开头
200
201
  else
202
+
201
203
  # 超过最大错误限制 判断错误类型
202
204
  case error
203
- when Net::HTTPFatalError
204
- raise error
205
205
  when EOFError
206
206
  Rails.logger.warn "EOFError!"
207
- if update_proxy?
208
- proxy(get_proxy)
209
- http_error_sleep
210
- retry # 这将把控制移到 begin 的开头
211
- else
212
- raise error
213
- end
214
207
  when Timeout::Error
215
208
  Rails.logger.warn "请求超时!"
216
- if update_proxy?
217
- @error_num = 0
218
- http_error_sleep
219
- retry # 这将把控制移到 begin 的开头
220
- else
221
- raise error
222
- end
209
+ when Net::HTTPServerException
210
+ Rails.logger.warn "代理失效:[#{proxy_address}:#{proxy_port}]"
211
+ when Errno::ECONNREFUSED
212
+ Rails.logger.warn "Errno::ECONNREFUSED"
213
+ else
214
+ raise error
215
+ end
216
+
217
+ if update_proxy?
218
+ @error_num = 0
219
+ retry # 这将把控制移到 begin 的开头
223
220
  else
224
221
  raise error
225
222
  end
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.2.2.7"
2
+ VERSION = "0.2.2.8"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2.7
4
+ version: 0.2.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-12-31 00:00:00.000000000 Z
11
+ date: 2019-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec