http_crawler 0.3.1.22 → 0.3.1.23

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ff27c531cab40b5af70cbdf5003aadd3ba8b344f83b3dc78b5eba72c45c3aad8
4
- data.tar.gz: 88b2b86dae1d14cb7500cd928a7f843efdff6c227fc70412f6a8b846c5e260b3
3
+ metadata.gz: 2f75d7cf5c15b83168a5a07fa0204b812aa3f57e4bddeefc536ccf929060572e
4
+ data.tar.gz: 61f8faf6c2ad2e14dd71c63a3e0ed8b15d981622569792d15fd7293da7432a3d
5
5
  SHA512:
6
- metadata.gz: bbe38263b22c262507a2b133403859ae5d2cbcf07079ab2d4825481bd4ebd3dc3b8431a67bb84c1051b0f8b0528bb6e481fbbe86462f6459d871e901d6d434ba
7
- data.tar.gz: 69ccb476477278c6bb73cae8a48367ecb9313e449884d95c1fb649fce33f69e3eae2e081004609ce4fba1dd1a6fa311d4a35757fe711f0c5fec1099c73f11566
6
+ metadata.gz: 0c170aa8c9e38bc783856ee1278a19f99f9e9451a3ba6e23c5a7ed33df58b77b61c7767e68e17f347a61b9e3b779746a25c240115d1b8f50bde1bcb0425637c2
7
+ data.tar.gz: 4ea0e3e94a0abbffdd1b3e89859d4e37c01ac1a8645f6a39919e923a59094c7c0e809a11efde45828b2ca230d4c3efc67ba79e1998e9c3b7ed9a850b918ec847
data/.idea/workspace.xml CHANGED
@@ -2,8 +2,9 @@
2
2
  <project version="4">
3
3
  <component name="ChangeListManager">
4
4
  <list default="true" id="07223dd4-8944-486b-a29b-7461a5c9ec2d" name="Default" comment="">
5
- <change beforePath="$PROJECT_DIR$/lib/http_crawler.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler.rb" afterDir="false" />
6
- <change beforePath="$PROJECT_DIR$/lib/http_crawler/decryption.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/decryption.rb" afterDir="false" />
5
+ <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
6
+ <change beforePath="$PROJECT_DIR$/lib/http_crawler/client.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/client.rb" afterDir="false" />
7
+ <change beforePath="$PROJECT_DIR$/lib/http_crawler/common/string.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/common/string.rb" afterDir="false" />
7
8
  <change beforePath="$PROJECT_DIR$/lib/http_crawler/version.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/version.rb" afterDir="false" />
8
9
  </list>
9
10
  <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
@@ -19,7 +20,7 @@
19
20
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
20
21
  <provider selected="true" editor-type-id="text-editor">
21
22
  <state relative-caret-position="18">
22
- <caret line="1" column="63" lean-forward="true" selection-start-line="1" selection-start-column="63" selection-end-line="2" selection-end-column="64" />
23
+ <caret line="1" column="63" selection-start-line="1" selection-start-column="63" selection-end-line="2" selection-end-column="64" />
23
24
  </state>
24
25
  </provider>
25
26
  </entry>
@@ -35,7 +36,11 @@
35
36
  </file>
36
37
  <file leaf-file-name="response.rb" pinned="false" current-in-tab="false">
37
38
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/http/response.rb">
38
- <provider selected="true" editor-type-id="text-editor" />
39
+ <provider selected="true" editor-type-id="text-editor">
40
+ <state relative-caret-position="2250">
41
+ <caret line="125" column="6" lean-forward="true" selection-start-line="125" selection-start-column="6" selection-end-line="125" selection-end-column="26" />
42
+ </state>
43
+ </provider>
39
44
  </entry>
40
45
  </file>
41
46
  <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
@@ -76,7 +81,11 @@
76
81
  </file>
77
82
  <file leaf-file-name="string.rb" pinned="false" current-in-tab="false">
78
83
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
79
- <provider selected="true" editor-type-id="text-editor" />
84
+ <provider selected="true" editor-type-id="text-editor">
85
+ <state relative-caret-position="342">
86
+ <caret line="19" column="5" lean-forward="true" selection-start-line="19" selection-start-column="5" selection-end-line="19" selection-end-column="5" />
87
+ </state>
88
+ </provider>
80
89
  </entry>
81
90
  </file>
82
91
  <file leaf-file-name="object.rb" pinned="false" current-in-tab="false">
@@ -87,8 +96,8 @@
87
96
  <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
88
97
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
89
98
  <provider selected="true" editor-type-id="text-editor">
90
- <state relative-caret-position="414">
91
- <caret line="23" selection-start-line="23" selection-end-line="23" />
99
+ <state relative-caret-position="374">
100
+ <caret line="278" column="75" lean-forward="true" selection-start-line="278" selection-start-column="75" selection-end-line="278" selection-end-column="75" />
92
101
  </state>
93
102
  </provider>
94
103
  </entry>
@@ -131,7 +140,6 @@
131
140
  <option value="$PROJECT_DIR$/lib/http_crawler/test.rb" />
132
141
  <option value="$PROJECT_DIR$/lib/http_crawler/test2.rb" />
133
142
  <option value="$PROJECT_DIR$/lib/http_crawler/common.rb" />
134
- <option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
135
143
  <option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/response/index.rb" />
136
144
  <option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb" />
137
145
  <option value="$PROJECT_DIR$/lib/http_crawler/proxy/client.rb" />
@@ -143,6 +151,8 @@
143
151
  <option value="$PROJECT_DIR$/lib/http_crawler/decryption/node.rb" />
144
152
  <option value="$PROJECT_DIR$/lib/http_crawler.rb" />
145
153
  <option value="$PROJECT_DIR$/lib/http_crawler/decryption.rb" />
154
+ <option value="$PROJECT_DIR$/lib/http_crawler/common/string.rb" />
155
+ <option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
146
156
  <option value="$PROJECT_DIR$/lib/http_crawler/version.rb" />
147
157
  </list>
148
158
  </option>
@@ -166,7 +176,6 @@
166
176
  <foldersAlwaysOnTop value="true" />
167
177
  </navigator>
168
178
  <panes>
169
- <pane id="Scope" />
170
179
  <pane id="ProjectPane">
171
180
  <subPane>
172
181
  <expand>
@@ -185,39 +194,11 @@
185
194
  <item name="lib" type="462c0819:PsiDirectoryNode" />
186
195
  <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
187
196
  </path>
188
- <path>
189
- <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
190
- <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
191
- <item name="lib" type="462c0819:PsiDirectoryNode" />
192
- <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
193
- <item name="decryption" type="462c0819:PsiDirectoryNode" />
194
- </path>
195
- <path>
196
- <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
197
- <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
198
- <item name="lib" type="462c0819:PsiDirectoryNode" />
199
- <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
200
- <item name="http" type="462c0819:PsiDirectoryNode" />
201
- </path>
202
- <path>
203
- <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
204
- <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
205
- <item name="lib" type="462c0819:PsiDirectoryNode" />
206
- <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
207
- <item name="web" type="462c0819:PsiDirectoryNode" />
208
- </path>
209
- <path>
210
- <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
211
- <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
212
- <item name="lib" type="462c0819:PsiDirectoryNode" />
213
- <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
214
- <item name="web" type="462c0819:PsiDirectoryNode" />
215
- <item name="baidu" type="462c0819:PsiDirectoryNode" />
216
- </path>
217
197
  </expand>
218
198
  <select />
219
199
  </subPane>
220
200
  </pane>
201
+ <pane id="Scope" />
221
202
  </panes>
222
203
  </component>
223
204
  <component name="PropertiesComponent">
@@ -268,18 +249,22 @@
268
249
  <workItem from="1550132724592" duration="3006000" />
269
250
  <workItem from="1550208979012" duration="304000" />
270
251
  <workItem from="1556176614736" duration="951000" />
271
- <workItem from="1556985453279" duration="414000" />
252
+ <workItem from="1556985453279" duration="1656000" />
253
+ <workItem from="1557136966397" duration="459000" />
254
+ <workItem from="1557137463254" duration="382000" />
255
+ <workItem from="1557156104186" duration="1815000" />
256
+ <workItem from="1557160216202" duration="7000" />
272
257
  </task>
273
258
  <servers />
274
259
  </component>
275
260
  <component name="TimeTrackingManager">
276
- <option name="totallyTimeSpent" value="28992000" />
261
+ <option name="totallyTimeSpent" value="32897000" />
277
262
  </component>
278
263
  <component name="ToolWindowManager">
279
264
  <frame x="0" y="0" width="1680" height="1050" extended-state="0" />
280
265
  <editor active="true" />
281
266
  <layout>
282
- <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.2722833" />
267
+ <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.16422467" />
283
268
  <window_info anchor="bottom" id="TODO" order="6" />
284
269
  <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
285
270
  <window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
@@ -288,17 +273,17 @@
288
273
  <window_info anchor="bottom" id="Run" order="2" />
289
274
  <window_info anchor="bottom" id="Version Control" order="7" />
290
275
  <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
291
- <window_info anchor="bottom" id="Terminal" order="7" weight="0.45725647" />
276
+ <window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.22871795" />
292
277
  <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
293
278
  <window_info id="Favorites" order="2" side_tool="true" />
294
279
  <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
295
280
  <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
296
281
  <window_info anchor="right" id="Commander" order="0" weight="0.4" />
297
282
  <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
283
+ <window_info anchor="bottom" id="Messages" order="7" weight="0.22974358" />
298
284
  <window_info anchor="bottom" id="Message" order="0" />
299
285
  <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
300
286
  <window_info anchor="bottom" id="Find" order="1" />
301
- <window_info anchor="bottom" id="Messages" order="7" weight="0.22974358" />
302
287
  </layout>
303
288
  </component>
304
289
  <component name="TypeScriptGeneratedFilesManager">
@@ -543,15 +528,6 @@
543
528
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/integer.rb">
544
529
  <provider selected="true" editor-type-id="text-editor" />
545
530
  </entry>
546
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/object.rb">
547
- <provider selected="true" editor-type-id="text-editor" />
548
- </entry>
549
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
550
- <provider selected="true" editor-type-id="text-editor" />
551
- </entry>
552
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/http/response.rb">
553
- <provider selected="true" editor-type-id="text-editor" />
554
- </entry>
555
531
  <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
556
532
  <provider selected="true" editor-type-id="text-editor">
557
533
  <state relative-caret-position="180">
@@ -566,31 +542,48 @@
566
542
  </state>
567
543
  </provider>
568
544
  </entry>
569
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
545
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/decryption.rb">
570
546
  <provider selected="true" editor-type-id="text-editor">
571
- <state relative-caret-position="414">
572
- <caret line="23" selection-start-line="23" selection-end-line="23" />
547
+ <state relative-caret-position="90">
548
+ <caret line="5" column="54" lean-forward="true" selection-start-line="5" selection-start-column="54" selection-end-line="5" selection-end-column="54" />
573
549
  </state>
574
550
  </provider>
575
551
  </entry>
576
- <entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
552
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
577
553
  <provider selected="true" editor-type-id="text-editor">
578
- <state relative-caret-position="414">
579
- <caret line="23" column="46" selection-start-line="23" selection-start-column="46" selection-end-line="23" selection-end-column="46" />
554
+ <state relative-caret-position="18">
555
+ <caret line="1" column="63" selection-start-line="1" selection-start-column="63" selection-end-line="2" selection-end-column="64" />
580
556
  </state>
581
557
  </provider>
582
558
  </entry>
583
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
559
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/object.rb">
560
+ <provider selected="true" editor-type-id="text-editor" />
561
+ </entry>
562
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
584
563
  <provider selected="true" editor-type-id="text-editor">
585
- <state relative-caret-position="18">
586
- <caret line="1" column="63" lean-forward="true" selection-start-line="1" selection-start-column="63" selection-end-line="2" selection-end-column="64" />
564
+ <state relative-caret-position="342">
565
+ <caret line="19" column="5" lean-forward="true" selection-start-line="19" selection-start-column="5" selection-end-line="19" selection-end-column="5" />
587
566
  </state>
588
567
  </provider>
589
568
  </entry>
590
- <entry file="file://$PROJECT_DIR$/lib/http_crawler/decryption.rb">
569
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/http/response.rb">
591
570
  <provider selected="true" editor-type-id="text-editor">
592
- <state relative-caret-position="90">
593
- <caret line="5" column="54" lean-forward="true" selection-start-line="5" selection-start-column="54" selection-end-line="5" selection-end-column="54" />
571
+ <state relative-caret-position="2250">
572
+ <caret line="125" column="6" lean-forward="true" selection-start-line="125" selection-start-column="6" selection-end-line="125" selection-end-column="26" />
573
+ </state>
574
+ </provider>
575
+ </entry>
576
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
577
+ <provider selected="true" editor-type-id="text-editor">
578
+ <state relative-caret-position="374">
579
+ <caret line="278" column="75" lean-forward="true" selection-start-line="278" selection-start-column="75" selection-end-line="278" selection-end-column="75" />
580
+ </state>
581
+ </provider>
582
+ </entry>
583
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
584
+ <provider selected="true" editor-type-id="text-editor">
585
+ <state relative-caret-position="414">
586
+ <caret line="23" column="46" selection-start-line="23" selection-start-column="46" selection-end-line="23" selection-end-column="46" />
594
587
  </state>
595
588
  </provider>
596
589
  </entry>
@@ -34,7 +34,7 @@ module HttpCrawler
34
34
  def initialize(parameter = {})
35
35
  parameter = parameter.symbolize_keys
36
36
 
37
- parameter[:uri_or_path] = parameter[:url]||parameter[:uri]
37
+ parameter[:uri_or_path] = parameter[:url] || parameter[:uri]
38
38
 
39
39
  if parameter[:uri_or_path]
40
40
  # 如果自定义uri
@@ -55,7 +55,7 @@ module HttpCrawler
55
55
  init_client
56
56
 
57
57
  # 初始化 代理参数
58
- @proxy_params = {key: "#{self.class.to_s.gsub(":","_")}"}
58
+ @proxy_params = {key: "#{self.class.to_s.gsub(":", "_")}"}
59
59
  end
60
60
 
61
61
  attr_accessor :max_error_num
@@ -270,9 +270,16 @@ module HttpCrawler
270
270
 
271
271
 
272
272
  # 发送 get 请求
273
- def get(path, params = {})
273
+ def get(path, params = {}, limit = 3)
274
274
  raise "Client uri为空" unless self.uri
275
- request {http.get((self.uri + path).to_s, :params => params, :ssl_context => @ctx)}
275
+ request do
276
+ r = http.get((self.uri + path).to_s, :params => params, :ssl_context => @ctx)
277
+ return r if limit < 0
278
+ r.html.at_xpath("//meta[@http-equiv='Refresh']").jagger_blank do |objc|
279
+ r = self.get(objc.to_html[/(?:URL|url)="?(.*)[^";>]/, 1], params, limit - 1)
280
+ end
281
+ r
282
+ end
276
283
  end
277
284
 
278
285
  # 直接发送uri的get请求
@@ -3,7 +3,20 @@ class String
3
3
  # 清除包含: 空格,回车
4
4
  #
5
5
  def jagger_del_inter
6
- self.gsub(/(?:\n|\t|\r| | )/, "")
6
+ self.gsub(/(?:\n|\t|\r| | | |)/, "")
7
+ end
8
+
9
+ # 创建时间: 2019/5/6 18:11
10
+ # 更新时间: 2019/5/6
11
+ # 作者: Jagger
12
+ # 方法名称: jagger_to_array
13
+ # 方法说明: 字符串分割成数组
14
+ # 调用方式: #jagger_to_array
15
+ #
16
+ # @return Array
17
+ #
18
+ def jagger_to_array
19
+ self.split(/(?:\n|\t|\r| | | )+/)
7
20
  end
8
21
 
9
22
  # 转换成时间格式
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.1.22"
2
+ VERSION = "0.3.1.23"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1.22
4
+ version: 0.3.1.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-05-04 00:00:00.000000000 Z
11
+ date: 2019-05-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec