http_crawler 0.3.1.27 → 0.3.1.28
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.idea/workspace.xml +14 -14
- data/lib/http_crawler/client.rb +2 -8
- data/lib/http_crawler/http/response.rb +3 -5
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 809654bd670ca7a1586478e45467c162d12ab5656d5ac32f0072beea5de967ee
|
4
|
+
data.tar.gz: 93c0efeeb3737db3184d3ca93d0bd9ced39ad13e52f8b73ae30b874786504d9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c9752c6d3ab7203fdb6058340dd049288217fb91a3cfa459055d369d522218a37d228a7ac47ee33f904f660dab6faeec7cba25f4a30123bd2327d5c30a8293d3
|
7
|
+
data.tar.gz: 8bf4fa8399ffe7fae9d7e6f256614ce23c9757772b165e319330078069dc76349cb5dd1dfffe9e44dd6ecf5ebf82572c49e4fc843314ed238cb95f8880f062f6
|
data/.idea/workspace.xml
CHANGED
@@ -95,8 +95,8 @@
|
|
95
95
|
<file leaf-file-name="client.rb" pinned="false" current-in-tab="true">
|
96
96
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
97
97
|
<provider selected="true" editor-type-id="text-editor">
|
98
|
-
<state relative-caret-position="
|
99
|
-
<caret line="
|
98
|
+
<state relative-caret-position="273">
|
99
|
+
<caret line="212" column="11" lean-forward="true" selection-start-line="212" selection-start-column="11" selection-end-line="212" selection-end-column="11" />
|
100
100
|
</state>
|
101
101
|
</provider>
|
102
102
|
</entry>
|
@@ -165,7 +165,7 @@
|
|
165
165
|
<component name="NodePackageJsonFileManager">
|
166
166
|
<packageJsonPaths />
|
167
167
|
</component>
|
168
|
-
<component name="ProjectFrameBounds" fullScreen="true">
|
168
|
+
<component name="ProjectFrameBounds" extendedState="6" fullScreen="true">
|
169
169
|
<option name="y" value="23" />
|
170
170
|
<option name="width" value="1280" />
|
171
171
|
<option name="height" value="777" />
|
@@ -175,6 +175,7 @@
|
|
175
175
|
<foldersAlwaysOnTop value="true" />
|
176
176
|
</navigator>
|
177
177
|
<panes>
|
178
|
+
<pane id="Scope" />
|
178
179
|
<pane id="ProjectPane">
|
179
180
|
<subPane>
|
180
181
|
<expand>
|
@@ -197,7 +198,6 @@
|
|
197
198
|
<select />
|
198
199
|
</subPane>
|
199
200
|
</pane>
|
200
|
-
<pane id="Scope" />
|
201
201
|
</panes>
|
202
202
|
</component>
|
203
203
|
<component name="PropertiesComponent">
|
@@ -253,29 +253,29 @@
|
|
253
253
|
<workItem from="1557137463254" duration="382000" />
|
254
254
|
<workItem from="1557156104186" duration="1815000" />
|
255
255
|
<workItem from="1557160216202" duration="138000" />
|
256
|
-
<workItem from="1563360666497" duration="
|
256
|
+
<workItem from="1563360666497" duration="431000" />
|
257
|
+
<workItem from="1563361538580" duration="6000" />
|
257
258
|
</task>
|
258
259
|
<servers />
|
259
260
|
</component>
|
260
261
|
<component name="TimeTrackingManager">
|
261
|
-
<option name="totallyTimeSpent" value="
|
262
|
+
<option name="totallyTimeSpent" value="33465000" />
|
262
263
|
</component>
|
263
264
|
<component name="ToolWindowManager">
|
264
|
-
<frame x="0" y="0" width="1680" height="1050" extended-state="
|
265
|
-
<editor active="true" />
|
265
|
+
<frame x="0" y="0" width="1680" height="1050" extended-state="6" />
|
266
266
|
<layout>
|
267
|
-
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.
|
267
|
+
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.0964591" />
|
268
268
|
<window_info anchor="bottom" id="TODO" order="6" />
|
269
269
|
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
|
270
270
|
<window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
|
271
271
|
<window_info anchor="right" id="Database" order="3" />
|
272
272
|
<window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
|
273
|
-
<window_info anchor="bottom" id="Run" order="2" />
|
274
273
|
<window_info anchor="bottom" id="Version Control" order="7" />
|
274
|
+
<window_info anchor="bottom" id="Run" order="2" />
|
275
275
|
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
|
276
|
-
<window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.
|
277
|
-
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
|
276
|
+
<window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.11794872" />
|
278
277
|
<window_info id="Favorites" order="2" side_tool="true" />
|
278
|
+
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
|
279
279
|
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
280
280
|
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
281
281
|
<window_info anchor="right" id="Commander" order="0" weight="0.4" />
|
@@ -589,8 +589,8 @@
|
|
589
589
|
</entry>
|
590
590
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
591
591
|
<provider selected="true" editor-type-id="text-editor">
|
592
|
-
<state relative-caret-position="
|
593
|
-
<caret line="
|
592
|
+
<state relative-caret-position="273">
|
593
|
+
<caret line="212" column="11" lean-forward="true" selection-start-line="212" selection-start-column="11" selection-end-line="212" selection-end-column="11" />
|
594
594
|
</state>
|
595
595
|
</provider>
|
596
596
|
</entry>
|
data/lib/http_crawler/client.rb
CHANGED
@@ -61,7 +61,7 @@ module HttpCrawler
|
|
61
61
|
attr_accessor :max_error_num
|
62
62
|
# 最大错误重试次数
|
63
63
|
def max_error_num
|
64
|
-
@max_error_num ||=
|
64
|
+
@max_error_num ||= 3
|
65
65
|
end
|
66
66
|
|
67
67
|
attr_reader :uri
|
@@ -334,13 +334,7 @@ module HttpCrawler
|
|
334
334
|
else
|
335
335
|
# 每次错误次数尝试 -1
|
336
336
|
n -= 1
|
337
|
-
|
338
|
-
when HTTP::TimeoutError
|
339
|
-
# 超时错误切换代理
|
340
|
-
raise error unless self.update_proxy?
|
341
|
-
else
|
342
|
-
raise error unless self.update_proxy?
|
343
|
-
end
|
337
|
+
self.update_proxy?
|
344
338
|
retry
|
345
339
|
end
|
346
340
|
end
|
@@ -8,9 +8,7 @@ module HTTP
|
|
8
8
|
# 数据解压
|
9
9
|
case self.headers['Content-Encoding']
|
10
10
|
when 'gzip' then
|
11
|
-
|
12
|
-
gz = Zlib::GzipReader.new(sio)
|
13
|
-
@decoding_body = gz.read()
|
11
|
+
@decoding_body = Zlib::GzipReader.new(StringIO.new(self.body.to_s), encoding: "ASCII-8BIT").read
|
14
12
|
when 'br'
|
15
13
|
@decoding_body = Brotli.inflate(self.body.to_s)
|
16
14
|
# when 'deflate'
|
@@ -35,12 +33,12 @@ module HTTP
|
|
35
33
|
|
36
34
|
# 进行转码
|
37
35
|
begin
|
38
|
-
@decoding_body.force_encoding(encoding).encode!('utf-8') if encoding && encoding != @decoding_body.encoding
|
36
|
+
@decoding_body.force_encoding(encoding).encode!('utf-8',invalid: :replace) if encoding && encoding != @decoding_body.encoding
|
39
37
|
rescue => e
|
40
38
|
# 转码错误后再次使用 CharDet 判断编码格式后进行转码
|
41
39
|
cd = CharDet.detect(@decoding_body)["encoding"]
|
42
40
|
if (cd && cd != encoding)
|
43
|
-
@decoding_body.force_encoding(cd).encode!('utf-8') if encoding != @decoding_body.encoding
|
41
|
+
@decoding_body.force_encoding(cd).encode!('utf-8',invalid: :replace) if encoding != @decoding_body.encoding
|
44
42
|
else
|
45
43
|
# 还是转码错误则抛出源码转字符串内容
|
46
44
|
self.body.to_s
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1.
|
4
|
+
version: 0.3.1.28
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-07-
|
11
|
+
date: 2019-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|