http_crawler 0.2.2.4 → 0.2.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.idea/workspace.xml +90 -86
- data/lib/http_crawler.rb +1 -1
- data/lib/http_crawler/http.rb +3 -2
- data/lib/http_crawler/proxy.rb +1 -1
- data/lib/http_crawler/version.rb +1 -1
- data/lib/http_crawler/web.rb +1 -2
- data/lib/http_crawler/web/baidu/client.rb +4 -1
- data/lib/http_crawler/web/client.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b5b2ca28b39f3ca7f425deffc3a619aa8542026a
|
4
|
+
data.tar.gz: fca1f293888563010d0988f8916c1def82e238c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b77a7227e718be104e664f58a55ee33e794a57a125aa63d42f5fd0d8ad732dc01018c229e1b75ca54d9f39785eb00c66b93b4ea6be244ddd7ed29b2818094c2
|
7
|
+
data.tar.gz: 823cae89613f38f0d0ce17304326c850f1403a3fbcf084de697c79a37b341f5e3d1cbdda0757345176094a662c44fa1a81baa75dcd27790a520e80017ecd02d4
|
data/.idea/workspace.xml
CHANGED
@@ -24,92 +24,92 @@
|
|
24
24
|
</component>
|
25
25
|
<component name="FileEditorManager">
|
26
26
|
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
27
|
-
<file leaf-file-name="version.rb" pinned="false" current-in-tab="
|
27
|
+
<file leaf-file-name="version.rb" pinned="false" current-in-tab="false">
|
28
28
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
29
29
|
<provider selected="true" editor-type-id="text-editor">
|
30
|
-
<state relative-caret-position="
|
31
|
-
<caret line="
|
30
|
+
<state relative-caret-position="15">
|
31
|
+
<caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
|
32
32
|
</state>
|
33
33
|
</provider>
|
34
34
|
</entry>
|
35
35
|
</file>
|
36
|
-
<file leaf-file-name="
|
37
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
36
|
+
<file leaf-file-name="http.rb" pinned="false" current-in-tab="true">
|
37
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
38
38
|
<provider selected="true" editor-type-id="text-editor">
|
39
|
-
<state>
|
40
|
-
<caret column="
|
39
|
+
<state relative-caret-position="-457">
|
40
|
+
<caret line="5" column="44" selection-start-line="5" selection-start-column="44" selection-end-line="5" selection-end-column="44" />
|
41
41
|
</state>
|
42
42
|
</provider>
|
43
43
|
</entry>
|
44
44
|
</file>
|
45
|
-
<file leaf-file-name="
|
46
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
45
|
+
<file leaf-file-name="response.rb" pinned="false" current-in-tab="false">
|
46
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/response.rb">
|
47
47
|
<provider selected="true" editor-type-id="text-editor">
|
48
|
-
<state relative-caret-position="
|
49
|
-
<caret line="
|
48
|
+
<state relative-caret-position="90">
|
49
|
+
<caret line="6" column="45" lean-forward="true" selection-start-line="6" selection-start-column="45" selection-end-line="6" selection-end-column="45" />
|
50
50
|
</state>
|
51
51
|
</provider>
|
52
52
|
</entry>
|
53
53
|
</file>
|
54
|
-
<file leaf-file-name="
|
55
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
54
|
+
<file leaf-file-name="http.rb" pinned="false" current-in-tab="false">
|
55
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/http.rb">
|
56
56
|
<provider selected="true" editor-type-id="text-editor">
|
57
|
-
<state relative-caret-position="
|
58
|
-
<caret line="
|
57
|
+
<state relative-caret-position="30">
|
58
|
+
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
59
59
|
</state>
|
60
60
|
</provider>
|
61
61
|
</entry>
|
62
62
|
</file>
|
63
|
-
<file leaf-file-name="
|
64
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web.rb">
|
63
|
+
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
64
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
65
65
|
<provider selected="true" editor-type-id="text-editor">
|
66
|
-
<state relative-caret-position="
|
67
|
-
<caret line="
|
66
|
+
<state relative-caret-position="75">
|
67
|
+
<caret line="5" column="46" selection-start-line="5" selection-start-column="46" selection-end-line="5" selection-end-column="46" />
|
68
68
|
</state>
|
69
69
|
</provider>
|
70
70
|
</entry>
|
71
71
|
</file>
|
72
|
-
<file leaf-file-name="
|
73
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
72
|
+
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
73
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
|
74
74
|
<provider selected="true" editor-type-id="text-editor">
|
75
|
-
<state relative-caret-position="
|
76
|
-
<caret line="
|
75
|
+
<state relative-caret-position="75">
|
76
|
+
<caret line="5" column="18" lean-forward="true" selection-start-line="5" selection-start-column="18" selection-end-line="5" selection-end-column="18" />
|
77
77
|
</state>
|
78
78
|
</provider>
|
79
79
|
</entry>
|
80
80
|
</file>
|
81
|
-
<file leaf-file-name="
|
82
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
81
|
+
<file leaf-file-name="web.rb" pinned="false" current-in-tab="false">
|
82
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web.rb">
|
83
83
|
<provider selected="true" editor-type-id="text-editor">
|
84
|
-
<state relative-caret-position="
|
85
|
-
<caret line="
|
84
|
+
<state relative-caret-position="45">
|
85
|
+
<caret line="3" column="41" selection-start-line="3" selection-start-column="41" selection-end-line="3" selection-end-column="41" />
|
86
86
|
</state>
|
87
87
|
</provider>
|
88
88
|
</entry>
|
89
89
|
</file>
|
90
|
-
<file leaf-file-name="
|
91
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler
|
90
|
+
<file leaf-file-name="http_crawler.rb" pinned="false" current-in-tab="false">
|
91
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
92
92
|
<provider selected="true" editor-type-id="text-editor">
|
93
|
-
<state relative-caret-position="
|
94
|
-
<caret line="
|
93
|
+
<state relative-caret-position="120">
|
94
|
+
<caret line="8" column="22" lean-forward="true" selection-start-line="8" selection-start-column="22" selection-end-line="8" selection-end-column="22" />
|
95
95
|
</state>
|
96
96
|
</provider>
|
97
97
|
</entry>
|
98
98
|
</file>
|
99
99
|
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
100
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
100
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
101
101
|
<provider selected="true" editor-type-id="text-editor">
|
102
|
-
<state>
|
103
|
-
<caret column="
|
102
|
+
<state relative-caret-position="90">
|
103
|
+
<caret line="6" column="36" selection-start-line="6" selection-start-column="36" selection-end-line="6" selection-end-column="36" />
|
104
104
|
</state>
|
105
105
|
</provider>
|
106
106
|
</entry>
|
107
107
|
</file>
|
108
|
-
<file leaf-file-name="
|
109
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
108
|
+
<file leaf-file-name="proxy.rb" pinned="false" current-in-tab="false">
|
109
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
|
110
110
|
<provider selected="true" editor-type-id="text-editor">
|
111
|
-
<state relative-caret-position="
|
112
|
-
<caret line="
|
111
|
+
<state relative-caret-position="75">
|
112
|
+
<caret line="5" column="40" selection-start-line="5" selection-start-column="40" selection-end-line="5" selection-end-column="40" />
|
113
113
|
</state>
|
114
114
|
</provider>
|
115
115
|
</entry>
|
@@ -150,16 +150,16 @@
|
|
150
150
|
<option value="$PROJECT_DIR$/lib/http_crawler/test1.rb" />
|
151
151
|
<option value="$PROJECT_DIR$/lib/http_crawler/test.rb" />
|
152
152
|
<option value="$PROJECT_DIR$/lib/http_crawler/test2.rb" />
|
153
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/http.rb" />
|
154
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/web/client.rb" />
|
155
153
|
<option value="$PROJECT_DIR$/lib/http_crawler/common.rb" />
|
156
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/web.rb" />
|
157
154
|
<option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
|
158
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/proxy.rb" />
|
159
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb" />
|
160
155
|
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/client.rb" />
|
161
156
|
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb" />
|
157
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy.rb" />
|
158
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web.rb" />
|
159
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/http.rb" />
|
162
160
|
<option value="$PROJECT_DIR$/lib/http_crawler.rb" />
|
161
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/client.rb" />
|
162
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb" />
|
163
163
|
<option value="$PROJECT_DIR$/lib/http_crawler/version.rb" />
|
164
164
|
</list>
|
165
165
|
</option>
|
@@ -248,12 +248,12 @@
|
|
248
248
|
<option name="presentableId" value="Default" />
|
249
249
|
<updated>1545966039594</updated>
|
250
250
|
<workItem from="1545966041001" duration="9181000" />
|
251
|
-
<workItem from="1546164127129" duration="
|
251
|
+
<workItem from="1546164127129" duration="8638000" />
|
252
252
|
</task>
|
253
253
|
<servers />
|
254
254
|
</component>
|
255
255
|
<component name="TimeTrackingManager">
|
256
|
-
<option name="totallyTimeSpent" value="
|
256
|
+
<option name="totallyTimeSpent" value="17819000" />
|
257
257
|
</component>
|
258
258
|
<component name="ToolWindowManager">
|
259
259
|
<frame x="0" y="0" width="1680" height="1050" extended-state="6" />
|
@@ -387,16 +387,6 @@
|
|
387
387
|
</state>
|
388
388
|
</provider>
|
389
389
|
</entry>
|
390
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/http.rb">
|
391
|
-
<provider selected="true" editor-type-id="text-editor" />
|
392
|
-
</entry>
|
393
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/response.rb">
|
394
|
-
<provider selected="true" editor-type-id="text-editor">
|
395
|
-
<state relative-caret-position="15">
|
396
|
-
<caret line="6" column="45" lean-forward="true" selection-start-line="6" selection-start-column="45" selection-end-line="6" selection-end-column="45" />
|
397
|
-
</state>
|
398
|
-
</provider>
|
399
|
-
</entry>
|
400
390
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response.rb">
|
401
391
|
<provider selected="true" editor-type-id="text-editor">
|
402
392
|
<state relative-caret-position="120">
|
@@ -480,87 +470,101 @@
|
|
480
470
|
</state>
|
481
471
|
</provider>
|
482
472
|
</entry>
|
483
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
473
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response/get_proxy.rb">
|
484
474
|
<provider selected="true" editor-type-id="text-editor">
|
485
|
-
<state relative-caret-position="
|
486
|
-
<caret line="
|
475
|
+
<state relative-caret-position="90">
|
476
|
+
<caret line="6" column="20" lean-forward="true" selection-start-line="6" selection-start-column="20" selection-end-line="6" selection-end-column="20" />
|
487
477
|
</state>
|
488
478
|
</provider>
|
489
479
|
</entry>
|
490
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
480
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
|
481
|
+
<provider selected="true" editor-type-id="text-editor">
|
482
|
+
<state relative-caret-position="75">
|
483
|
+
<caret line="5" lean-forward="true" selection-start-line="5" selection-end-line="5" />
|
484
|
+
</state>
|
485
|
+
</provider>
|
486
|
+
</entry>
|
487
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
491
488
|
<provider selected="true" editor-type-id="text-editor">
|
492
489
|
<state relative-caret-position="30">
|
493
490
|
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
494
491
|
</state>
|
495
492
|
</provider>
|
496
493
|
</entry>
|
497
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
494
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
|
498
495
|
<provider selected="true" editor-type-id="text-editor">
|
499
|
-
<state
|
500
|
-
<caret
|
496
|
+
<state>
|
497
|
+
<caret column="18" lean-forward="true" selection-end-column="57" />
|
501
498
|
</state>
|
502
499
|
</provider>
|
503
500
|
</entry>
|
504
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
501
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/http.rb">
|
505
502
|
<provider selected="true" editor-type-id="text-editor">
|
506
|
-
<state relative-caret-position="
|
507
|
-
<caret line="
|
503
|
+
<state relative-caret-position="30">
|
504
|
+
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
508
505
|
</state>
|
509
506
|
</provider>
|
510
507
|
</entry>
|
511
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
508
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/response.rb">
|
512
509
|
<provider selected="true" editor-type-id="text-editor">
|
513
|
-
<state>
|
514
|
-
<caret column="
|
510
|
+
<state relative-caret-position="90">
|
511
|
+
<caret line="6" column="45" lean-forward="true" selection-start-line="6" selection-start-column="45" selection-end-line="6" selection-end-column="45" />
|
515
512
|
</state>
|
516
513
|
</provider>
|
517
514
|
</entry>
|
518
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
515
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
519
516
|
<provider selected="true" editor-type-id="text-editor">
|
520
|
-
<state relative-caret-position="
|
521
|
-
<caret line="
|
517
|
+
<state relative-caret-position="90">
|
518
|
+
<caret line="6" column="36" selection-start-line="6" selection-start-column="36" selection-end-line="6" selection-end-column="36" />
|
522
519
|
</state>
|
523
520
|
</provider>
|
524
521
|
</entry>
|
525
522
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
|
526
523
|
<provider selected="true" editor-type-id="text-editor">
|
527
|
-
<state relative-caret-position="
|
528
|
-
<caret line="
|
524
|
+
<state relative-caret-position="75">
|
525
|
+
<caret line="5" column="40" selection-start-line="5" selection-start-column="40" selection-end-line="5" selection-end-column="40" />
|
529
526
|
</state>
|
530
527
|
</provider>
|
531
528
|
</entry>
|
532
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler
|
529
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
533
530
|
<provider selected="true" editor-type-id="text-editor">
|
534
|
-
<state>
|
535
|
-
<caret column="
|
531
|
+
<state relative-caret-position="120">
|
532
|
+
<caret line="8" column="22" lean-forward="true" selection-start-line="8" selection-start-column="22" selection-end-line="8" selection-end-column="22" />
|
536
533
|
</state>
|
537
534
|
</provider>
|
538
535
|
</entry>
|
539
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
536
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web.rb">
|
540
537
|
<provider selected="true" editor-type-id="text-editor">
|
541
|
-
<state relative-caret-position="
|
542
|
-
<caret line="
|
538
|
+
<state relative-caret-position="45">
|
539
|
+
<caret line="3" column="41" selection-start-line="3" selection-start-column="41" selection-end-line="3" selection-end-column="41" />
|
543
540
|
</state>
|
544
541
|
</provider>
|
545
542
|
</entry>
|
546
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
543
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
547
544
|
<provider selected="true" editor-type-id="text-editor">
|
548
545
|
<state relative-caret-position="75">
|
549
|
-
<caret line="5"
|
546
|
+
<caret line="5" column="46" selection-start-line="5" selection-start-column="46" selection-end-line="5" selection-end-column="46" />
|
550
547
|
</state>
|
551
548
|
</provider>
|
552
549
|
</entry>
|
553
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
550
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
|
554
551
|
<provider selected="true" editor-type-id="text-editor">
|
555
|
-
<state relative-caret-position="
|
556
|
-
<caret line="
|
552
|
+
<state relative-caret-position="75">
|
553
|
+
<caret line="5" column="18" lean-forward="true" selection-start-line="5" selection-start-column="18" selection-end-line="5" selection-end-column="18" />
|
557
554
|
</state>
|
558
555
|
</provider>
|
559
556
|
</entry>
|
560
557
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
561
558
|
<provider selected="true" editor-type-id="text-editor">
|
562
|
-
<state relative-caret-position="
|
563
|
-
<caret line="
|
559
|
+
<state relative-caret-position="15">
|
560
|
+
<caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
|
561
|
+
</state>
|
562
|
+
</provider>
|
563
|
+
</entry>
|
564
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
565
|
+
<provider selected="true" editor-type-id="text-editor">
|
566
|
+
<state relative-caret-position="-457">
|
567
|
+
<caret line="5" column="44" selection-start-line="5" selection-start-column="44" selection-end-line="5" selection-end-column="44" />
|
564
568
|
</state>
|
565
569
|
</provider>
|
566
570
|
</entry>
|
data/lib/http_crawler.rb
CHANGED
data/lib/http_crawler/http.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
-
load File.dirname(__FILE__) + '/net/http.rb'
|
2
|
-
load File.dirname(__FILE__) + '/net/response.rb'
|
3
1
|
|
4
2
|
module HttpCrawler
|
5
3
|
class HTTP < Net::HTTP
|
6
4
|
|
5
|
+
load File.dirname(__FILE__) + '/net/http.rb'
|
6
|
+
load File.dirname(__FILE__) + '/net/response.rb'
|
7
|
+
|
7
8
|
# 自动获取代理,true 表示自动获取代理 、false 表示不自动获取
|
8
9
|
attr_accessor :auto_proxy
|
9
10
|
# 代理API的别名 主要关联 HttpCrawler::Proxy中维护的代理API
|
data/lib/http_crawler/proxy.rb
CHANGED
data/lib/http_crawler/version.rb
CHANGED
data/lib/http_crawler/web.rb
CHANGED