http_crawler 0.3.1.22 → 0.3.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.idea/workspace.xml +57 -64
- data/lib/http_crawler/client.rb +11 -4
- data/lib/http_crawler/common/string.rb +14 -1
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2f75d7cf5c15b83168a5a07fa0204b812aa3f57e4bddeefc536ccf929060572e
|
|
4
|
+
data.tar.gz: 61f8faf6c2ad2e14dd71c63a3e0ed8b15d981622569792d15fd7293da7432a3d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0c170aa8c9e38bc783856ee1278a19f99f9e9451a3ba6e23c5a7ed33df58b77b61c7767e68e17f347a61b9e3b779746a25c240115d1b8f50bde1bcb0425637c2
|
|
7
|
+
data.tar.gz: 4ea0e3e94a0abbffdd1b3e89859d4e37c01ac1a8645f6a39919e923a59094c7c0e809a11efde45828b2ca230d4c3efc67ba79e1998e9c3b7ed9a850b918ec847
|
data/.idea/workspace.xml
CHANGED
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
<project version="4">
|
|
3
3
|
<component name="ChangeListManager">
|
|
4
4
|
<list default="true" id="07223dd4-8944-486b-a29b-7461a5c9ec2d" name="Default" comment="">
|
|
5
|
-
<change beforePath="$PROJECT_DIR
|
|
6
|
-
<change beforePath="$PROJECT_DIR$/lib/http_crawler/
|
|
5
|
+
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
|
6
|
+
<change beforePath="$PROJECT_DIR$/lib/http_crawler/client.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/client.rb" afterDir="false" />
|
|
7
|
+
<change beforePath="$PROJECT_DIR$/lib/http_crawler/common/string.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/common/string.rb" afterDir="false" />
|
|
7
8
|
<change beforePath="$PROJECT_DIR$/lib/http_crawler/version.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/version.rb" afterDir="false" />
|
|
8
9
|
</list>
|
|
9
10
|
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
|
@@ -19,7 +20,7 @@
|
|
|
19
20
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
|
20
21
|
<provider selected="true" editor-type-id="text-editor">
|
|
21
22
|
<state relative-caret-position="18">
|
|
22
|
-
<caret line="1" column="63"
|
|
23
|
+
<caret line="1" column="63" selection-start-line="1" selection-start-column="63" selection-end-line="2" selection-end-column="64" />
|
|
23
24
|
</state>
|
|
24
25
|
</provider>
|
|
25
26
|
</entry>
|
|
@@ -35,7 +36,11 @@
|
|
|
35
36
|
</file>
|
|
36
37
|
<file leaf-file-name="response.rb" pinned="false" current-in-tab="false">
|
|
37
38
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http/response.rb">
|
|
38
|
-
<provider selected="true" editor-type-id="text-editor"
|
|
39
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
40
|
+
<state relative-caret-position="2250">
|
|
41
|
+
<caret line="125" column="6" lean-forward="true" selection-start-line="125" selection-start-column="6" selection-end-line="125" selection-end-column="26" />
|
|
42
|
+
</state>
|
|
43
|
+
</provider>
|
|
39
44
|
</entry>
|
|
40
45
|
</file>
|
|
41
46
|
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
|
@@ -76,7 +81,11 @@
|
|
|
76
81
|
</file>
|
|
77
82
|
<file leaf-file-name="string.rb" pinned="false" current-in-tab="false">
|
|
78
83
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
|
|
79
|
-
<provider selected="true" editor-type-id="text-editor"
|
|
84
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
85
|
+
<state relative-caret-position="342">
|
|
86
|
+
<caret line="19" column="5" lean-forward="true" selection-start-line="19" selection-start-column="5" selection-end-line="19" selection-end-column="5" />
|
|
87
|
+
</state>
|
|
88
|
+
</provider>
|
|
80
89
|
</entry>
|
|
81
90
|
</file>
|
|
82
91
|
<file leaf-file-name="object.rb" pinned="false" current-in-tab="false">
|
|
@@ -87,8 +96,8 @@
|
|
|
87
96
|
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
|
88
97
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
|
89
98
|
<provider selected="true" editor-type-id="text-editor">
|
|
90
|
-
<state relative-caret-position="
|
|
91
|
-
<caret line="
|
|
99
|
+
<state relative-caret-position="374">
|
|
100
|
+
<caret line="278" column="75" lean-forward="true" selection-start-line="278" selection-start-column="75" selection-end-line="278" selection-end-column="75" />
|
|
92
101
|
</state>
|
|
93
102
|
</provider>
|
|
94
103
|
</entry>
|
|
@@ -131,7 +140,6 @@
|
|
|
131
140
|
<option value="$PROJECT_DIR$/lib/http_crawler/test.rb" />
|
|
132
141
|
<option value="$PROJECT_DIR$/lib/http_crawler/test2.rb" />
|
|
133
142
|
<option value="$PROJECT_DIR$/lib/http_crawler/common.rb" />
|
|
134
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
|
|
135
143
|
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/response/index.rb" />
|
|
136
144
|
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb" />
|
|
137
145
|
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/client.rb" />
|
|
@@ -143,6 +151,8 @@
|
|
|
143
151
|
<option value="$PROJECT_DIR$/lib/http_crawler/decryption/node.rb" />
|
|
144
152
|
<option value="$PROJECT_DIR$/lib/http_crawler.rb" />
|
|
145
153
|
<option value="$PROJECT_DIR$/lib/http_crawler/decryption.rb" />
|
|
154
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/common/string.rb" />
|
|
155
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
|
|
146
156
|
<option value="$PROJECT_DIR$/lib/http_crawler/version.rb" />
|
|
147
157
|
</list>
|
|
148
158
|
</option>
|
|
@@ -166,7 +176,6 @@
|
|
|
166
176
|
<foldersAlwaysOnTop value="true" />
|
|
167
177
|
</navigator>
|
|
168
178
|
<panes>
|
|
169
|
-
<pane id="Scope" />
|
|
170
179
|
<pane id="ProjectPane">
|
|
171
180
|
<subPane>
|
|
172
181
|
<expand>
|
|
@@ -185,39 +194,11 @@
|
|
|
185
194
|
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
|
186
195
|
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
187
196
|
</path>
|
|
188
|
-
<path>
|
|
189
|
-
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
|
190
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
191
|
-
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
|
192
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
193
|
-
<item name="decryption" type="462c0819:PsiDirectoryNode" />
|
|
194
|
-
</path>
|
|
195
|
-
<path>
|
|
196
|
-
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
|
197
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
198
|
-
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
|
199
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
200
|
-
<item name="http" type="462c0819:PsiDirectoryNode" />
|
|
201
|
-
</path>
|
|
202
|
-
<path>
|
|
203
|
-
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
|
204
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
205
|
-
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
|
206
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
207
|
-
<item name="web" type="462c0819:PsiDirectoryNode" />
|
|
208
|
-
</path>
|
|
209
|
-
<path>
|
|
210
|
-
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
|
211
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
212
|
-
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
|
213
|
-
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
|
214
|
-
<item name="web" type="462c0819:PsiDirectoryNode" />
|
|
215
|
-
<item name="baidu" type="462c0819:PsiDirectoryNode" />
|
|
216
|
-
</path>
|
|
217
197
|
</expand>
|
|
218
198
|
<select />
|
|
219
199
|
</subPane>
|
|
220
200
|
</pane>
|
|
201
|
+
<pane id="Scope" />
|
|
221
202
|
</panes>
|
|
222
203
|
</component>
|
|
223
204
|
<component name="PropertiesComponent">
|
|
@@ -268,18 +249,22 @@
|
|
|
268
249
|
<workItem from="1550132724592" duration="3006000" />
|
|
269
250
|
<workItem from="1550208979012" duration="304000" />
|
|
270
251
|
<workItem from="1556176614736" duration="951000" />
|
|
271
|
-
<workItem from="1556985453279" duration="
|
|
252
|
+
<workItem from="1556985453279" duration="1656000" />
|
|
253
|
+
<workItem from="1557136966397" duration="459000" />
|
|
254
|
+
<workItem from="1557137463254" duration="382000" />
|
|
255
|
+
<workItem from="1557156104186" duration="1815000" />
|
|
256
|
+
<workItem from="1557160216202" duration="7000" />
|
|
272
257
|
</task>
|
|
273
258
|
<servers />
|
|
274
259
|
</component>
|
|
275
260
|
<component name="TimeTrackingManager">
|
|
276
|
-
<option name="totallyTimeSpent" value="
|
|
261
|
+
<option name="totallyTimeSpent" value="32897000" />
|
|
277
262
|
</component>
|
|
278
263
|
<component name="ToolWindowManager">
|
|
279
264
|
<frame x="0" y="0" width="1680" height="1050" extended-state="0" />
|
|
280
265
|
<editor active="true" />
|
|
281
266
|
<layout>
|
|
282
|
-
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.
|
|
267
|
+
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.16422467" />
|
|
283
268
|
<window_info anchor="bottom" id="TODO" order="6" />
|
|
284
269
|
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
|
|
285
270
|
<window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
|
|
@@ -288,17 +273,17 @@
|
|
|
288
273
|
<window_info anchor="bottom" id="Run" order="2" />
|
|
289
274
|
<window_info anchor="bottom" id="Version Control" order="7" />
|
|
290
275
|
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
|
|
291
|
-
<window_info anchor="bottom" id="Terminal" order="7" weight="0.
|
|
276
|
+
<window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.22871795" />
|
|
292
277
|
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
|
|
293
278
|
<window_info id="Favorites" order="2" side_tool="true" />
|
|
294
279
|
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
|
295
280
|
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
|
296
281
|
<window_info anchor="right" id="Commander" order="0" weight="0.4" />
|
|
297
282
|
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
|
|
283
|
+
<window_info anchor="bottom" id="Messages" order="7" weight="0.22974358" />
|
|
298
284
|
<window_info anchor="bottom" id="Message" order="0" />
|
|
299
285
|
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
|
|
300
286
|
<window_info anchor="bottom" id="Find" order="1" />
|
|
301
|
-
<window_info anchor="bottom" id="Messages" order="7" weight="0.22974358" />
|
|
302
287
|
</layout>
|
|
303
288
|
</component>
|
|
304
289
|
<component name="TypeScriptGeneratedFilesManager">
|
|
@@ -543,15 +528,6 @@
|
|
|
543
528
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/integer.rb">
|
|
544
529
|
<provider selected="true" editor-type-id="text-editor" />
|
|
545
530
|
</entry>
|
|
546
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/object.rb">
|
|
547
|
-
<provider selected="true" editor-type-id="text-editor" />
|
|
548
|
-
</entry>
|
|
549
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
|
|
550
|
-
<provider selected="true" editor-type-id="text-editor" />
|
|
551
|
-
</entry>
|
|
552
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http/response.rb">
|
|
553
|
-
<provider selected="true" editor-type-id="text-editor" />
|
|
554
|
-
</entry>
|
|
555
531
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
|
556
532
|
<provider selected="true" editor-type-id="text-editor">
|
|
557
533
|
<state relative-caret-position="180">
|
|
@@ -566,31 +542,48 @@
|
|
|
566
542
|
</state>
|
|
567
543
|
</provider>
|
|
568
544
|
</entry>
|
|
569
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
545
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/decryption.rb">
|
|
570
546
|
<provider selected="true" editor-type-id="text-editor">
|
|
571
|
-
<state relative-caret-position="
|
|
572
|
-
<caret line="
|
|
547
|
+
<state relative-caret-position="90">
|
|
548
|
+
<caret line="5" column="54" lean-forward="true" selection-start-line="5" selection-start-column="54" selection-end-line="5" selection-end-column="54" />
|
|
573
549
|
</state>
|
|
574
550
|
</provider>
|
|
575
551
|
</entry>
|
|
576
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
|
552
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
|
577
553
|
<provider selected="true" editor-type-id="text-editor">
|
|
578
|
-
<state relative-caret-position="
|
|
579
|
-
<caret line="
|
|
554
|
+
<state relative-caret-position="18">
|
|
555
|
+
<caret line="1" column="63" selection-start-line="1" selection-start-column="63" selection-end-line="2" selection-end-column="64" />
|
|
580
556
|
</state>
|
|
581
557
|
</provider>
|
|
582
558
|
</entry>
|
|
583
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
|
559
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/object.rb">
|
|
560
|
+
<provider selected="true" editor-type-id="text-editor" />
|
|
561
|
+
</entry>
|
|
562
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
|
|
584
563
|
<provider selected="true" editor-type-id="text-editor">
|
|
585
|
-
<state relative-caret-position="
|
|
586
|
-
<caret line="
|
|
564
|
+
<state relative-caret-position="342">
|
|
565
|
+
<caret line="19" column="5" lean-forward="true" selection-start-line="19" selection-start-column="5" selection-end-line="19" selection-end-column="5" />
|
|
587
566
|
</state>
|
|
588
567
|
</provider>
|
|
589
568
|
</entry>
|
|
590
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
569
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http/response.rb">
|
|
591
570
|
<provider selected="true" editor-type-id="text-editor">
|
|
592
|
-
<state relative-caret-position="
|
|
593
|
-
<caret line="
|
|
571
|
+
<state relative-caret-position="2250">
|
|
572
|
+
<caret line="125" column="6" lean-forward="true" selection-start-line="125" selection-start-column="6" selection-end-line="125" selection-end-column="26" />
|
|
573
|
+
</state>
|
|
574
|
+
</provider>
|
|
575
|
+
</entry>
|
|
576
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
|
577
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
578
|
+
<state relative-caret-position="374">
|
|
579
|
+
<caret line="278" column="75" lean-forward="true" selection-start-line="278" selection-start-column="75" selection-end-line="278" selection-end-column="75" />
|
|
580
|
+
</state>
|
|
581
|
+
</provider>
|
|
582
|
+
</entry>
|
|
583
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
|
584
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
585
|
+
<state relative-caret-position="414">
|
|
586
|
+
<caret line="23" column="46" selection-start-line="23" selection-start-column="46" selection-end-line="23" selection-end-column="46" />
|
|
594
587
|
</state>
|
|
595
588
|
</provider>
|
|
596
589
|
</entry>
|
data/lib/http_crawler/client.rb
CHANGED
|
@@ -34,7 +34,7 @@ module HttpCrawler
|
|
|
34
34
|
def initialize(parameter = {})
|
|
35
35
|
parameter = parameter.symbolize_keys
|
|
36
36
|
|
|
37
|
-
parameter[:uri_or_path] = parameter[:url]||parameter[:uri]
|
|
37
|
+
parameter[:uri_or_path] = parameter[:url] || parameter[:uri]
|
|
38
38
|
|
|
39
39
|
if parameter[:uri_or_path]
|
|
40
40
|
# 如果自定义uri
|
|
@@ -55,7 +55,7 @@ module HttpCrawler
|
|
|
55
55
|
init_client
|
|
56
56
|
|
|
57
57
|
# 初始化 代理参数
|
|
58
|
-
@proxy_params = {key: "#{self.class.to_s.gsub(":","_")}"}
|
|
58
|
+
@proxy_params = {key: "#{self.class.to_s.gsub(":", "_")}"}
|
|
59
59
|
end
|
|
60
60
|
|
|
61
61
|
attr_accessor :max_error_num
|
|
@@ -270,9 +270,16 @@ module HttpCrawler
|
|
|
270
270
|
|
|
271
271
|
|
|
272
272
|
# 发送 get 请求
|
|
273
|
-
def get(path, params = {})
|
|
273
|
+
def get(path, params = {}, limit = 3)
|
|
274
274
|
raise "Client uri为空" unless self.uri
|
|
275
|
-
request
|
|
275
|
+
request do
|
|
276
|
+
r = http.get((self.uri + path).to_s, :params => params, :ssl_context => @ctx)
|
|
277
|
+
return r if limit < 0
|
|
278
|
+
r.html.at_xpath("//meta[@http-equiv='Refresh']").jagger_blank do |objc|
|
|
279
|
+
r = self.get(objc.to_html[/(?:URL|url)="?(.*)[^";>]/, 1], params, limit - 1)
|
|
280
|
+
end
|
|
281
|
+
r
|
|
282
|
+
end
|
|
276
283
|
end
|
|
277
284
|
|
|
278
285
|
# 直接发送uri的get请求
|
|
@@ -3,7 +3,20 @@ class String
|
|
|
3
3
|
# 清除包含: 空格,回车
|
|
4
4
|
#
|
|
5
5
|
def jagger_del_inter
|
|
6
|
-
self.gsub(/(?:\n|\t|\r| | )/, "")
|
|
6
|
+
self.gsub(/(?:\n|\t|\r| | | |)/, "")
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# 创建时间: 2019/5/6 18:11
|
|
10
|
+
# 更新时间: 2019/5/6
|
|
11
|
+
# 作者: Jagger
|
|
12
|
+
# 方法名称: jagger_to_array
|
|
13
|
+
# 方法说明: 字符串分割成数组
|
|
14
|
+
# 调用方式: #jagger_to_array
|
|
15
|
+
#
|
|
16
|
+
# @return Array
|
|
17
|
+
#
|
|
18
|
+
def jagger_to_array
|
|
19
|
+
self.split(/(?:\n|\t|\r| | | )+/)
|
|
7
20
|
end
|
|
8
21
|
|
|
9
22
|
# 转换成时间格式
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: http_crawler
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.1.
|
|
4
|
+
version: 0.3.1.23
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- jagger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2019-05-
|
|
11
|
+
date: 2019-05-06 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|