http_crawler 0.3.1.27 → 0.3.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d3236b0e130a26ee4e07238719f7bcbe70e72ae78729bc3d5dc5149c072a1b03
4
- data.tar.gz: adf5773258b3863038858e6086ecb39fce1f2d6fb09f292ea619e1552ab86011
3
+ metadata.gz: 809654bd670ca7a1586478e45467c162d12ab5656d5ac32f0072beea5de967ee
4
+ data.tar.gz: 93c0efeeb3737db3184d3ca93d0bd9ced39ad13e52f8b73ae30b874786504d9b
5
5
  SHA512:
6
- metadata.gz: 5170bfaba6df0ce2e14372f3e3e83e773d112dfd9c29060f9ca3b72b40a3dada05f040d24e8408aa4ff27575c347927fc47e7e2fc6598ecbb0e8ebf65d70c13b
7
- data.tar.gz: 85161b1c474689cef0fcb57118caf701c642f4896381be1bbf1f3640889f7f950c7b53124cc7229225834a8a372daec6ec30ec55d0d7366d6599d2d9df7e0162
6
+ metadata.gz: c9752c6d3ab7203fdb6058340dd049288217fb91a3cfa459055d369d522218a37d228a7ac47ee33f904f660dab6faeec7cba25f4a30123bd2327d5c30a8293d3
7
+ data.tar.gz: 8bf4fa8399ffe7fae9d7e6f256614ce23c9757772b165e319330078069dc76349cb5dd1dfffe9e44dd6ecf5ebf82572c49e4fc843314ed238cb95f8880f062f6
data/.idea/workspace.xml CHANGED
@@ -95,8 +95,8 @@
95
95
  <file leaf-file-name="client.rb" pinned="false" current-in-tab="true">
96
96
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
97
97
  <provider selected="true" editor-type-id="text-editor">
98
- <state relative-caret-position="201">
99
- <caret line="208" selection-start-line="208" selection-end-line="208" />
98
+ <state relative-caret-position="273">
99
+ <caret line="212" column="11" lean-forward="true" selection-start-line="212" selection-start-column="11" selection-end-line="212" selection-end-column="11" />
100
100
  </state>
101
101
  </provider>
102
102
  </entry>
@@ -165,7 +165,7 @@
165
165
  <component name="NodePackageJsonFileManager">
166
166
  <packageJsonPaths />
167
167
  </component>
168
- <component name="ProjectFrameBounds" fullScreen="true">
168
+ <component name="ProjectFrameBounds" extendedState="6" fullScreen="true">
169
169
  <option name="y" value="23" />
170
170
  <option name="width" value="1280" />
171
171
  <option name="height" value="777" />
@@ -175,6 +175,7 @@
175
175
  <foldersAlwaysOnTop value="true" />
176
176
  </navigator>
177
177
  <panes>
178
+ <pane id="Scope" />
178
179
  <pane id="ProjectPane">
179
180
  <subPane>
180
181
  <expand>
@@ -197,7 +198,6 @@
197
198
  <select />
198
199
  </subPane>
199
200
  </pane>
200
- <pane id="Scope" />
201
201
  </panes>
202
202
  </component>
203
203
  <component name="PropertiesComponent">
@@ -253,29 +253,29 @@
253
253
  <workItem from="1557137463254" duration="382000" />
254
254
  <workItem from="1557156104186" duration="1815000" />
255
255
  <workItem from="1557160216202" duration="138000" />
256
- <workItem from="1563360666497" duration="12000" />
256
+ <workItem from="1563360666497" duration="431000" />
257
+ <workItem from="1563361538580" duration="6000" />
257
258
  </task>
258
259
  <servers />
259
260
  </component>
260
261
  <component name="TimeTrackingManager">
261
- <option name="totallyTimeSpent" value="33040000" />
262
+ <option name="totallyTimeSpent" value="33465000" />
262
263
  </component>
263
264
  <component name="ToolWindowManager">
264
- <frame x="0" y="0" width="1680" height="1050" extended-state="0" />
265
- <editor active="true" />
265
+ <frame x="0" y="0" width="1680" height="1050" extended-state="6" />
266
266
  <layout>
267
- <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.12576313" />
267
+ <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.0964591" />
268
268
  <window_info anchor="bottom" id="TODO" order="6" />
269
269
  <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
270
270
  <window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
271
271
  <window_info anchor="right" id="Database" order="3" />
272
272
  <window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
273
- <window_info anchor="bottom" id="Run" order="2" />
274
273
  <window_info anchor="bottom" id="Version Control" order="7" />
274
+ <window_info anchor="bottom" id="Run" order="2" />
275
275
  <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
276
- <window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.16410257" />
277
- <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
276
+ <window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.11794872" />
278
277
  <window_info id="Favorites" order="2" side_tool="true" />
278
+ <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
279
279
  <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
280
280
  <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
281
281
  <window_info anchor="right" id="Commander" order="0" weight="0.4" />
@@ -589,8 +589,8 @@
589
589
  </entry>
590
590
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
591
591
  <provider selected="true" editor-type-id="text-editor">
592
- <state relative-caret-position="201">
593
- <caret line="208" selection-start-line="208" selection-end-line="208" />
592
+ <state relative-caret-position="273">
593
+ <caret line="212" column="11" lean-forward="true" selection-start-line="212" selection-start-column="11" selection-end-line="212" selection-end-column="11" />
594
594
  </state>
595
595
  </provider>
596
596
  </entry>
@@ -61,7 +61,7 @@ module HttpCrawler
61
61
  attr_accessor :max_error_num
62
62
  # 最大错误重试次数
63
63
  def max_error_num
64
- @max_error_num ||= 1
64
+ @max_error_num ||= 3
65
65
  end
66
66
 
67
67
  attr_reader :uri
@@ -334,13 +334,7 @@ module HttpCrawler
334
334
  else
335
335
  # 每次错误次数尝试 -1
336
336
  n -= 1
337
- case error
338
- when HTTP::TimeoutError
339
- # 超时错误切换代理
340
- raise error unless self.update_proxy?
341
- else
342
- raise error unless self.update_proxy?
343
- end
337
+ self.update_proxy?
344
338
  retry
345
339
  end
346
340
  end
@@ -8,9 +8,7 @@ module HTTP
8
8
  # 数据解压
9
9
  case self.headers['Content-Encoding']
10
10
  when 'gzip' then
11
- sio = StringIO.new(self.body.to_s)
12
- gz = Zlib::GzipReader.new(sio)
13
- @decoding_body = gz.read()
11
+ @decoding_body = Zlib::GzipReader.new(StringIO.new(self.body.to_s), encoding: "ASCII-8BIT").read
14
12
  when 'br'
15
13
  @decoding_body = Brotli.inflate(self.body.to_s)
16
14
  # when 'deflate'
@@ -35,12 +33,12 @@ module HTTP
35
33
 
36
34
  # 进行转码
37
35
  begin
38
- @decoding_body.force_encoding(encoding).encode!('utf-8') if encoding && encoding != @decoding_body.encoding
36
+ @decoding_body.force_encoding(encoding).encode!('utf-8',invalid: :replace) if encoding && encoding != @decoding_body.encoding
39
37
  rescue => e
40
38
  # 转码错误后再次使用 CharDet 判断编码格式后进行转码
41
39
  cd = CharDet.detect(@decoding_body)["encoding"]
42
40
  if (cd && cd != encoding)
43
- @decoding_body.force_encoding(cd).encode!('utf-8') if encoding != @decoding_body.encoding
41
+ @decoding_body.force_encoding(cd).encode!('utf-8',invalid: :replace) if encoding != @decoding_body.encoding
44
42
  else
45
43
  # 还是转码错误则抛出源码转字符串内容
46
44
  self.body.to_s
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.1.27"
2
+ VERSION = "0.3.1.28"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1.27
4
+ version: 0.3.1.28
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-17 00:00:00.000000000 Z
11
+ date: 2019-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec