http_crawler 0.3.1.22 → 0.3.1.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.idea/workspace.xml +57 -64
- data/lib/http_crawler/client.rb +11 -4
- data/lib/http_crawler/common/string.rb +14 -1
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2f75d7cf5c15b83168a5a07fa0204b812aa3f57e4bddeefc536ccf929060572e
|
4
|
+
data.tar.gz: 61f8faf6c2ad2e14dd71c63a3e0ed8b15d981622569792d15fd7293da7432a3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0c170aa8c9e38bc783856ee1278a19f99f9e9451a3ba6e23c5a7ed33df58b77b61c7767e68e17f347a61b9e3b779746a25c240115d1b8f50bde1bcb0425637c2
|
7
|
+
data.tar.gz: 4ea0e3e94a0abbffdd1b3e89859d4e37c01ac1a8645f6a39919e923a59094c7c0e809a11efde45828b2ca230d4c3efc67ba79e1998e9c3b7ed9a850b918ec847
|
data/.idea/workspace.xml
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
<project version="4">
|
3
3
|
<component name="ChangeListManager">
|
4
4
|
<list default="true" id="07223dd4-8944-486b-a29b-7461a5c9ec2d" name="Default" comment="">
|
5
|
-
<change beforePath="$PROJECT_DIR
|
6
|
-
<change beforePath="$PROJECT_DIR$/lib/http_crawler/
|
5
|
+
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
6
|
+
<change beforePath="$PROJECT_DIR$/lib/http_crawler/client.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/client.rb" afterDir="false" />
|
7
|
+
<change beforePath="$PROJECT_DIR$/lib/http_crawler/common/string.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/common/string.rb" afterDir="false" />
|
7
8
|
<change beforePath="$PROJECT_DIR$/lib/http_crawler/version.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/version.rb" afterDir="false" />
|
8
9
|
</list>
|
9
10
|
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
@@ -19,7 +20,7 @@
|
|
19
20
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
20
21
|
<provider selected="true" editor-type-id="text-editor">
|
21
22
|
<state relative-caret-position="18">
|
22
|
-
<caret line="1" column="63"
|
23
|
+
<caret line="1" column="63" selection-start-line="1" selection-start-column="63" selection-end-line="2" selection-end-column="64" />
|
23
24
|
</state>
|
24
25
|
</provider>
|
25
26
|
</entry>
|
@@ -35,7 +36,11 @@
|
|
35
36
|
</file>
|
36
37
|
<file leaf-file-name="response.rb" pinned="false" current-in-tab="false">
|
37
38
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http/response.rb">
|
38
|
-
<provider selected="true" editor-type-id="text-editor"
|
39
|
+
<provider selected="true" editor-type-id="text-editor">
|
40
|
+
<state relative-caret-position="2250">
|
41
|
+
<caret line="125" column="6" lean-forward="true" selection-start-line="125" selection-start-column="6" selection-end-line="125" selection-end-column="26" />
|
42
|
+
</state>
|
43
|
+
</provider>
|
39
44
|
</entry>
|
40
45
|
</file>
|
41
46
|
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
@@ -76,7 +81,11 @@
|
|
76
81
|
</file>
|
77
82
|
<file leaf-file-name="string.rb" pinned="false" current-in-tab="false">
|
78
83
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
|
79
|
-
<provider selected="true" editor-type-id="text-editor"
|
84
|
+
<provider selected="true" editor-type-id="text-editor">
|
85
|
+
<state relative-caret-position="342">
|
86
|
+
<caret line="19" column="5" lean-forward="true" selection-start-line="19" selection-start-column="5" selection-end-line="19" selection-end-column="5" />
|
87
|
+
</state>
|
88
|
+
</provider>
|
80
89
|
</entry>
|
81
90
|
</file>
|
82
91
|
<file leaf-file-name="object.rb" pinned="false" current-in-tab="false">
|
@@ -87,8 +96,8 @@
|
|
87
96
|
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
88
97
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
89
98
|
<provider selected="true" editor-type-id="text-editor">
|
90
|
-
<state relative-caret-position="
|
91
|
-
<caret line="
|
99
|
+
<state relative-caret-position="374">
|
100
|
+
<caret line="278" column="75" lean-forward="true" selection-start-line="278" selection-start-column="75" selection-end-line="278" selection-end-column="75" />
|
92
101
|
</state>
|
93
102
|
</provider>
|
94
103
|
</entry>
|
@@ -131,7 +140,6 @@
|
|
131
140
|
<option value="$PROJECT_DIR$/lib/http_crawler/test.rb" />
|
132
141
|
<option value="$PROJECT_DIR$/lib/http_crawler/test2.rb" />
|
133
142
|
<option value="$PROJECT_DIR$/lib/http_crawler/common.rb" />
|
134
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
|
135
143
|
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/response/index.rb" />
|
136
144
|
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb" />
|
137
145
|
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/client.rb" />
|
@@ -143,6 +151,8 @@
|
|
143
151
|
<option value="$PROJECT_DIR$/lib/http_crawler/decryption/node.rb" />
|
144
152
|
<option value="$PROJECT_DIR$/lib/http_crawler.rb" />
|
145
153
|
<option value="$PROJECT_DIR$/lib/http_crawler/decryption.rb" />
|
154
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/common/string.rb" />
|
155
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
|
146
156
|
<option value="$PROJECT_DIR$/lib/http_crawler/version.rb" />
|
147
157
|
</list>
|
148
158
|
</option>
|
@@ -166,7 +176,6 @@
|
|
166
176
|
<foldersAlwaysOnTop value="true" />
|
167
177
|
</navigator>
|
168
178
|
<panes>
|
169
|
-
<pane id="Scope" />
|
170
179
|
<pane id="ProjectPane">
|
171
180
|
<subPane>
|
172
181
|
<expand>
|
@@ -185,39 +194,11 @@
|
|
185
194
|
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
186
195
|
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
187
196
|
</path>
|
188
|
-
<path>
|
189
|
-
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
190
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
191
|
-
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
192
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
193
|
-
<item name="decryption" type="462c0819:PsiDirectoryNode" />
|
194
|
-
</path>
|
195
|
-
<path>
|
196
|
-
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
197
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
198
|
-
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
199
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
200
|
-
<item name="http" type="462c0819:PsiDirectoryNode" />
|
201
|
-
</path>
|
202
|
-
<path>
|
203
|
-
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
204
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
205
|
-
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
206
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
207
|
-
<item name="web" type="462c0819:PsiDirectoryNode" />
|
208
|
-
</path>
|
209
|
-
<path>
|
210
|
-
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
211
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
212
|
-
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
213
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
214
|
-
<item name="web" type="462c0819:PsiDirectoryNode" />
|
215
|
-
<item name="baidu" type="462c0819:PsiDirectoryNode" />
|
216
|
-
</path>
|
217
197
|
</expand>
|
218
198
|
<select />
|
219
199
|
</subPane>
|
220
200
|
</pane>
|
201
|
+
<pane id="Scope" />
|
221
202
|
</panes>
|
222
203
|
</component>
|
223
204
|
<component name="PropertiesComponent">
|
@@ -268,18 +249,22 @@
|
|
268
249
|
<workItem from="1550132724592" duration="3006000" />
|
269
250
|
<workItem from="1550208979012" duration="304000" />
|
270
251
|
<workItem from="1556176614736" duration="951000" />
|
271
|
-
<workItem from="1556985453279" duration="
|
252
|
+
<workItem from="1556985453279" duration="1656000" />
|
253
|
+
<workItem from="1557136966397" duration="459000" />
|
254
|
+
<workItem from="1557137463254" duration="382000" />
|
255
|
+
<workItem from="1557156104186" duration="1815000" />
|
256
|
+
<workItem from="1557160216202" duration="7000" />
|
272
257
|
</task>
|
273
258
|
<servers />
|
274
259
|
</component>
|
275
260
|
<component name="TimeTrackingManager">
|
276
|
-
<option name="totallyTimeSpent" value="
|
261
|
+
<option name="totallyTimeSpent" value="32897000" />
|
277
262
|
</component>
|
278
263
|
<component name="ToolWindowManager">
|
279
264
|
<frame x="0" y="0" width="1680" height="1050" extended-state="0" />
|
280
265
|
<editor active="true" />
|
281
266
|
<layout>
|
282
|
-
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.
|
267
|
+
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.16422467" />
|
283
268
|
<window_info anchor="bottom" id="TODO" order="6" />
|
284
269
|
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
|
285
270
|
<window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
|
@@ -288,17 +273,17 @@
|
|
288
273
|
<window_info anchor="bottom" id="Run" order="2" />
|
289
274
|
<window_info anchor="bottom" id="Version Control" order="7" />
|
290
275
|
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
|
291
|
-
<window_info anchor="bottom" id="Terminal" order="7" weight="0.
|
276
|
+
<window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.22871795" />
|
292
277
|
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
|
293
278
|
<window_info id="Favorites" order="2" side_tool="true" />
|
294
279
|
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
295
280
|
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
296
281
|
<window_info anchor="right" id="Commander" order="0" weight="0.4" />
|
297
282
|
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
|
283
|
+
<window_info anchor="bottom" id="Messages" order="7" weight="0.22974358" />
|
298
284
|
<window_info anchor="bottom" id="Message" order="0" />
|
299
285
|
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
|
300
286
|
<window_info anchor="bottom" id="Find" order="1" />
|
301
|
-
<window_info anchor="bottom" id="Messages" order="7" weight="0.22974358" />
|
302
287
|
</layout>
|
303
288
|
</component>
|
304
289
|
<component name="TypeScriptGeneratedFilesManager">
|
@@ -543,15 +528,6 @@
|
|
543
528
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/integer.rb">
|
544
529
|
<provider selected="true" editor-type-id="text-editor" />
|
545
530
|
</entry>
|
546
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/object.rb">
|
547
|
-
<provider selected="true" editor-type-id="text-editor" />
|
548
|
-
</entry>
|
549
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
|
550
|
-
<provider selected="true" editor-type-id="text-editor" />
|
551
|
-
</entry>
|
552
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http/response.rb">
|
553
|
-
<provider selected="true" editor-type-id="text-editor" />
|
554
|
-
</entry>
|
555
531
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
556
532
|
<provider selected="true" editor-type-id="text-editor">
|
557
533
|
<state relative-caret-position="180">
|
@@ -566,31 +542,48 @@
|
|
566
542
|
</state>
|
567
543
|
</provider>
|
568
544
|
</entry>
|
569
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
545
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/decryption.rb">
|
570
546
|
<provider selected="true" editor-type-id="text-editor">
|
571
|
-
<state relative-caret-position="
|
572
|
-
<caret line="
|
547
|
+
<state relative-caret-position="90">
|
548
|
+
<caret line="5" column="54" lean-forward="true" selection-start-line="5" selection-start-column="54" selection-end-line="5" selection-end-column="54" />
|
573
549
|
</state>
|
574
550
|
</provider>
|
575
551
|
</entry>
|
576
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
552
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
577
553
|
<provider selected="true" editor-type-id="text-editor">
|
578
|
-
<state relative-caret-position="
|
579
|
-
<caret line="
|
554
|
+
<state relative-caret-position="18">
|
555
|
+
<caret line="1" column="63" selection-start-line="1" selection-start-column="63" selection-end-line="2" selection-end-column="64" />
|
580
556
|
</state>
|
581
557
|
</provider>
|
582
558
|
</entry>
|
583
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
559
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/object.rb">
|
560
|
+
<provider selected="true" editor-type-id="text-editor" />
|
561
|
+
</entry>
|
562
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
|
584
563
|
<provider selected="true" editor-type-id="text-editor">
|
585
|
-
<state relative-caret-position="
|
586
|
-
<caret line="
|
564
|
+
<state relative-caret-position="342">
|
565
|
+
<caret line="19" column="5" lean-forward="true" selection-start-line="19" selection-start-column="5" selection-end-line="19" selection-end-column="5" />
|
587
566
|
</state>
|
588
567
|
</provider>
|
589
568
|
</entry>
|
590
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
569
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http/response.rb">
|
591
570
|
<provider selected="true" editor-type-id="text-editor">
|
592
|
-
<state relative-caret-position="
|
593
|
-
<caret line="
|
571
|
+
<state relative-caret-position="2250">
|
572
|
+
<caret line="125" column="6" lean-forward="true" selection-start-line="125" selection-start-column="6" selection-end-line="125" selection-end-column="26" />
|
573
|
+
</state>
|
574
|
+
</provider>
|
575
|
+
</entry>
|
576
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
577
|
+
<provider selected="true" editor-type-id="text-editor">
|
578
|
+
<state relative-caret-position="374">
|
579
|
+
<caret line="278" column="75" lean-forward="true" selection-start-line="278" selection-start-column="75" selection-end-line="278" selection-end-column="75" />
|
580
|
+
</state>
|
581
|
+
</provider>
|
582
|
+
</entry>
|
583
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
584
|
+
<provider selected="true" editor-type-id="text-editor">
|
585
|
+
<state relative-caret-position="414">
|
586
|
+
<caret line="23" column="46" selection-start-line="23" selection-start-column="46" selection-end-line="23" selection-end-column="46" />
|
594
587
|
</state>
|
595
588
|
</provider>
|
596
589
|
</entry>
|
data/lib/http_crawler/client.rb
CHANGED
@@ -34,7 +34,7 @@ module HttpCrawler
|
|
34
34
|
def initialize(parameter = {})
|
35
35
|
parameter = parameter.symbolize_keys
|
36
36
|
|
37
|
-
parameter[:uri_or_path] = parameter[:url]||parameter[:uri]
|
37
|
+
parameter[:uri_or_path] = parameter[:url] || parameter[:uri]
|
38
38
|
|
39
39
|
if parameter[:uri_or_path]
|
40
40
|
# 如果自定义uri
|
@@ -55,7 +55,7 @@ module HttpCrawler
|
|
55
55
|
init_client
|
56
56
|
|
57
57
|
# 初始化 代理参数
|
58
|
-
@proxy_params = {key: "#{self.class.to_s.gsub(":","_")}"}
|
58
|
+
@proxy_params = {key: "#{self.class.to_s.gsub(":", "_")}"}
|
59
59
|
end
|
60
60
|
|
61
61
|
attr_accessor :max_error_num
|
@@ -270,9 +270,16 @@ module HttpCrawler
|
|
270
270
|
|
271
271
|
|
272
272
|
# 发送 get 请求
|
273
|
-
def get(path, params = {})
|
273
|
+
def get(path, params = {}, limit = 3)
|
274
274
|
raise "Client uri为空" unless self.uri
|
275
|
-
request
|
275
|
+
request do
|
276
|
+
r = http.get((self.uri + path).to_s, :params => params, :ssl_context => @ctx)
|
277
|
+
return r if limit < 0
|
278
|
+
r.html.at_xpath("//meta[@http-equiv='Refresh']").jagger_blank do |objc|
|
279
|
+
r = self.get(objc.to_html[/(?:URL|url)="?(.*)[^";>]/, 1], params, limit - 1)
|
280
|
+
end
|
281
|
+
r
|
282
|
+
end
|
276
283
|
end
|
277
284
|
|
278
285
|
# 直接发送uri的get请求
|
@@ -3,7 +3,20 @@ class String
|
|
3
3
|
# 清除包含: 空格,回车
|
4
4
|
#
|
5
5
|
def jagger_del_inter
|
6
|
-
self.gsub(/(?:\n|\t|\r| | )/, "")
|
6
|
+
self.gsub(/(?:\n|\t|\r| | | |)/, "")
|
7
|
+
end
|
8
|
+
|
9
|
+
# 创建时间: 2019/5/6 18:11
|
10
|
+
# 更新时间: 2019/5/6
|
11
|
+
# 作者: Jagger
|
12
|
+
# 方法名称: jagger_to_array
|
13
|
+
# 方法说明: 字符串分割成数组
|
14
|
+
# 调用方式: #jagger_to_array
|
15
|
+
#
|
16
|
+
# @return Array
|
17
|
+
#
|
18
|
+
def jagger_to_array
|
19
|
+
self.split(/(?:\n|\t|\r| | | )+/)
|
7
20
|
end
|
8
21
|
|
9
22
|
# 转换成时间格式
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1.
|
4
|
+
version: 0.3.1.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-05-
|
11
|
+
date: 2019-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|