http_crawler 0.3.1.27 → 0.3.1.28

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d3236b0e130a26ee4e07238719f7bcbe70e72ae78729bc3d5dc5149c072a1b03
4
- data.tar.gz: adf5773258b3863038858e6086ecb39fce1f2d6fb09f292ea619e1552ab86011
3
+ metadata.gz: 809654bd670ca7a1586478e45467c162d12ab5656d5ac32f0072beea5de967ee
4
+ data.tar.gz: 93c0efeeb3737db3184d3ca93d0bd9ced39ad13e52f8b73ae30b874786504d9b
5
5
  SHA512:
6
- metadata.gz: 5170bfaba6df0ce2e14372f3e3e83e773d112dfd9c29060f9ca3b72b40a3dada05f040d24e8408aa4ff27575c347927fc47e7e2fc6598ecbb0e8ebf65d70c13b
7
- data.tar.gz: 85161b1c474689cef0fcb57118caf701c642f4896381be1bbf1f3640889f7f950c7b53124cc7229225834a8a372daec6ec30ec55d0d7366d6599d2d9df7e0162
6
+ metadata.gz: c9752c6d3ab7203fdb6058340dd049288217fb91a3cfa459055d369d522218a37d228a7ac47ee33f904f660dab6faeec7cba25f4a30123bd2327d5c30a8293d3
7
+ data.tar.gz: 8bf4fa8399ffe7fae9d7e6f256614ce23c9757772b165e319330078069dc76349cb5dd1dfffe9e44dd6ecf5ebf82572c49e4fc843314ed238cb95f8880f062f6
data/.idea/workspace.xml CHANGED
@@ -95,8 +95,8 @@
95
95
  <file leaf-file-name="client.rb" pinned="false" current-in-tab="true">
96
96
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
97
97
  <provider selected="true" editor-type-id="text-editor">
98
- <state relative-caret-position="201">
99
- <caret line="208" selection-start-line="208" selection-end-line="208" />
98
+ <state relative-caret-position="273">
99
+ <caret line="212" column="11" lean-forward="true" selection-start-line="212" selection-start-column="11" selection-end-line="212" selection-end-column="11" />
100
100
  </state>
101
101
  </provider>
102
102
  </entry>
@@ -165,7 +165,7 @@
165
165
  <component name="NodePackageJsonFileManager">
166
166
  <packageJsonPaths />
167
167
  </component>
168
- <component name="ProjectFrameBounds" fullScreen="true">
168
+ <component name="ProjectFrameBounds" extendedState="6" fullScreen="true">
169
169
  <option name="y" value="23" />
170
170
  <option name="width" value="1280" />
171
171
  <option name="height" value="777" />
@@ -175,6 +175,7 @@
175
175
  <foldersAlwaysOnTop value="true" />
176
176
  </navigator>
177
177
  <panes>
178
+ <pane id="Scope" />
178
179
  <pane id="ProjectPane">
179
180
  <subPane>
180
181
  <expand>
@@ -197,7 +198,6 @@
197
198
  <select />
198
199
  </subPane>
199
200
  </pane>
200
- <pane id="Scope" />
201
201
  </panes>
202
202
  </component>
203
203
  <component name="PropertiesComponent">
@@ -253,29 +253,29 @@
253
253
  <workItem from="1557137463254" duration="382000" />
254
254
  <workItem from="1557156104186" duration="1815000" />
255
255
  <workItem from="1557160216202" duration="138000" />
256
- <workItem from="1563360666497" duration="12000" />
256
+ <workItem from="1563360666497" duration="431000" />
257
+ <workItem from="1563361538580" duration="6000" />
257
258
  </task>
258
259
  <servers />
259
260
  </component>
260
261
  <component name="TimeTrackingManager">
261
- <option name="totallyTimeSpent" value="33040000" />
262
+ <option name="totallyTimeSpent" value="33465000" />
262
263
  </component>
263
264
  <component name="ToolWindowManager">
264
- <frame x="0" y="0" width="1680" height="1050" extended-state="0" />
265
- <editor active="true" />
265
+ <frame x="0" y="0" width="1680" height="1050" extended-state="6" />
266
266
  <layout>
267
- <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.12576313" />
267
+ <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.0964591" />
268
268
  <window_info anchor="bottom" id="TODO" order="6" />
269
269
  <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
270
270
  <window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
271
271
  <window_info anchor="right" id="Database" order="3" />
272
272
  <window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
273
- <window_info anchor="bottom" id="Run" order="2" />
274
273
  <window_info anchor="bottom" id="Version Control" order="7" />
274
+ <window_info anchor="bottom" id="Run" order="2" />
275
275
  <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
276
- <window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.16410257" />
277
- <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
276
+ <window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.11794872" />
278
277
  <window_info id="Favorites" order="2" side_tool="true" />
278
+ <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
279
279
  <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
280
280
  <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
281
281
  <window_info anchor="right" id="Commander" order="0" weight="0.4" />
@@ -589,8 +589,8 @@
589
589
  </entry>
590
590
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
591
591
  <provider selected="true" editor-type-id="text-editor">
592
- <state relative-caret-position="201">
593
- <caret line="208" selection-start-line="208" selection-end-line="208" />
592
+ <state relative-caret-position="273">
593
+ <caret line="212" column="11" lean-forward="true" selection-start-line="212" selection-start-column="11" selection-end-line="212" selection-end-column="11" />
594
594
  </state>
595
595
  </provider>
596
596
  </entry>
@@ -61,7 +61,7 @@ module HttpCrawler
61
61
  attr_accessor :max_error_num
62
62
  # 最大错误重试次数
63
63
  def max_error_num
64
- @max_error_num ||= 1
64
+ @max_error_num ||= 3
65
65
  end
66
66
 
67
67
  attr_reader :uri
@@ -334,13 +334,7 @@ module HttpCrawler
334
334
  else
335
335
  # 每次错误次数尝试 -1
336
336
  n -= 1
337
- case error
338
- when HTTP::TimeoutError
339
- # 超时错误切换代理
340
- raise error unless self.update_proxy?
341
- else
342
- raise error unless self.update_proxy?
343
- end
337
+ self.update_proxy?
344
338
  retry
345
339
  end
346
340
  end
@@ -8,9 +8,7 @@ module HTTP
8
8
  # 数据解压
9
9
  case self.headers['Content-Encoding']
10
10
  when 'gzip' then
11
- sio = StringIO.new(self.body.to_s)
12
- gz = Zlib::GzipReader.new(sio)
13
- @decoding_body = gz.read()
11
+ @decoding_body = Zlib::GzipReader.new(StringIO.new(self.body.to_s), encoding: "ASCII-8BIT").read
14
12
  when 'br'
15
13
  @decoding_body = Brotli.inflate(self.body.to_s)
16
14
  # when 'deflate'
@@ -35,12 +33,12 @@ module HTTP
35
33
 
36
34
  # 进行转码
37
35
  begin
38
- @decoding_body.force_encoding(encoding).encode!('utf-8') if encoding && encoding != @decoding_body.encoding
36
+ @decoding_body.force_encoding(encoding).encode!('utf-8',invalid: :replace) if encoding && encoding != @decoding_body.encoding
39
37
  rescue => e
40
38
  # 转码错误后再次使用 CharDet 判断编码格式后进行转码
41
39
  cd = CharDet.detect(@decoding_body)["encoding"]
42
40
  if (cd && cd != encoding)
43
- @decoding_body.force_encoding(cd).encode!('utf-8') if encoding != @decoding_body.encoding
41
+ @decoding_body.force_encoding(cd).encode!('utf-8',invalid: :replace) if encoding != @decoding_body.encoding
44
42
  else
45
43
  # 还是转码错误则抛出源码转字符串内容
46
44
  self.body.to_s
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.1.27"
2
+ VERSION = "0.3.1.28"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1.27
4
+ version: 0.3.1.28
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-17 00:00:00.000000000 Z
11
+ date: 2019-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec