http_crawler 0.2.2.4 → 0.2.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.idea/workspace.xml +90 -86
- data/lib/http_crawler.rb +1 -1
- data/lib/http_crawler/http.rb +3 -2
- data/lib/http_crawler/proxy.rb +1 -1
- data/lib/http_crawler/version.rb +1 -1
- data/lib/http_crawler/web.rb +1 -2
- data/lib/http_crawler/web/baidu/client.rb +4 -1
- data/lib/http_crawler/web/client.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b5b2ca28b39f3ca7f425deffc3a619aa8542026a
|
|
4
|
+
data.tar.gz: fca1f293888563010d0988f8916c1def82e238c6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8b77a7227e718be104e664f58a55ee33e794a57a125aa63d42f5fd0d8ad732dc01018c229e1b75ca54d9f39785eb00c66b93b4ea6be244ddd7ed29b2818094c2
|
|
7
|
+
data.tar.gz: 823cae89613f38f0d0ce17304326c850f1403a3fbcf084de697c79a37b341f5e3d1cbdda0757345176094a662c44fa1a81baa75dcd27790a520e80017ecd02d4
|
data/.idea/workspace.xml
CHANGED
|
@@ -24,92 +24,92 @@
|
|
|
24
24
|
</component>
|
|
25
25
|
<component name="FileEditorManager">
|
|
26
26
|
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
|
27
|
-
<file leaf-file-name="version.rb" pinned="false" current-in-tab="
|
|
27
|
+
<file leaf-file-name="version.rb" pinned="false" current-in-tab="false">
|
|
28
28
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
|
29
29
|
<provider selected="true" editor-type-id="text-editor">
|
|
30
|
-
<state relative-caret-position="
|
|
31
|
-
<caret line="
|
|
30
|
+
<state relative-caret-position="15">
|
|
31
|
+
<caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
|
|
32
32
|
</state>
|
|
33
33
|
</provider>
|
|
34
34
|
</entry>
|
|
35
35
|
</file>
|
|
36
|
-
<file leaf-file-name="
|
|
37
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
36
|
+
<file leaf-file-name="http.rb" pinned="false" current-in-tab="true">
|
|
37
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
|
38
38
|
<provider selected="true" editor-type-id="text-editor">
|
|
39
|
-
<state>
|
|
40
|
-
<caret column="
|
|
39
|
+
<state relative-caret-position="-457">
|
|
40
|
+
<caret line="5" column="44" selection-start-line="5" selection-start-column="44" selection-end-line="5" selection-end-column="44" />
|
|
41
41
|
</state>
|
|
42
42
|
</provider>
|
|
43
43
|
</entry>
|
|
44
44
|
</file>
|
|
45
|
-
<file leaf-file-name="
|
|
46
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
45
|
+
<file leaf-file-name="response.rb" pinned="false" current-in-tab="false">
|
|
46
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/response.rb">
|
|
47
47
|
<provider selected="true" editor-type-id="text-editor">
|
|
48
|
-
<state relative-caret-position="
|
|
49
|
-
<caret line="
|
|
48
|
+
<state relative-caret-position="90">
|
|
49
|
+
<caret line="6" column="45" lean-forward="true" selection-start-line="6" selection-start-column="45" selection-end-line="6" selection-end-column="45" />
|
|
50
50
|
</state>
|
|
51
51
|
</provider>
|
|
52
52
|
</entry>
|
|
53
53
|
</file>
|
|
54
|
-
<file leaf-file-name="
|
|
55
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
54
|
+
<file leaf-file-name="http.rb" pinned="false" current-in-tab="false">
|
|
55
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/http.rb">
|
|
56
56
|
<provider selected="true" editor-type-id="text-editor">
|
|
57
|
-
<state relative-caret-position="
|
|
58
|
-
<caret line="
|
|
57
|
+
<state relative-caret-position="30">
|
|
58
|
+
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
|
59
59
|
</state>
|
|
60
60
|
</provider>
|
|
61
61
|
</entry>
|
|
62
62
|
</file>
|
|
63
|
-
<file leaf-file-name="
|
|
64
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web.rb">
|
|
63
|
+
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
|
64
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
|
65
65
|
<provider selected="true" editor-type-id="text-editor">
|
|
66
|
-
<state relative-caret-position="
|
|
67
|
-
<caret line="
|
|
66
|
+
<state relative-caret-position="75">
|
|
67
|
+
<caret line="5" column="46" selection-start-line="5" selection-start-column="46" selection-end-line="5" selection-end-column="46" />
|
|
68
68
|
</state>
|
|
69
69
|
</provider>
|
|
70
70
|
</entry>
|
|
71
71
|
</file>
|
|
72
|
-
<file leaf-file-name="
|
|
73
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
|
72
|
+
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
|
73
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
|
|
74
74
|
<provider selected="true" editor-type-id="text-editor">
|
|
75
|
-
<state relative-caret-position="
|
|
76
|
-
<caret line="
|
|
75
|
+
<state relative-caret-position="75">
|
|
76
|
+
<caret line="5" column="18" lean-forward="true" selection-start-line="5" selection-start-column="18" selection-end-line="5" selection-end-column="18" />
|
|
77
77
|
</state>
|
|
78
78
|
</provider>
|
|
79
79
|
</entry>
|
|
80
80
|
</file>
|
|
81
|
-
<file leaf-file-name="
|
|
82
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
81
|
+
<file leaf-file-name="web.rb" pinned="false" current-in-tab="false">
|
|
82
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web.rb">
|
|
83
83
|
<provider selected="true" editor-type-id="text-editor">
|
|
84
|
-
<state relative-caret-position="
|
|
85
|
-
<caret line="
|
|
84
|
+
<state relative-caret-position="45">
|
|
85
|
+
<caret line="3" column="41" selection-start-line="3" selection-start-column="41" selection-end-line="3" selection-end-column="41" />
|
|
86
86
|
</state>
|
|
87
87
|
</provider>
|
|
88
88
|
</entry>
|
|
89
89
|
</file>
|
|
90
|
-
<file leaf-file-name="
|
|
91
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler
|
|
90
|
+
<file leaf-file-name="http_crawler.rb" pinned="false" current-in-tab="false">
|
|
91
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
|
92
92
|
<provider selected="true" editor-type-id="text-editor">
|
|
93
|
-
<state relative-caret-position="
|
|
94
|
-
<caret line="
|
|
93
|
+
<state relative-caret-position="120">
|
|
94
|
+
<caret line="8" column="22" lean-forward="true" selection-start-line="8" selection-start-column="22" selection-end-line="8" selection-end-column="22" />
|
|
95
95
|
</state>
|
|
96
96
|
</provider>
|
|
97
97
|
</entry>
|
|
98
98
|
</file>
|
|
99
99
|
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
|
100
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
100
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
|
101
101
|
<provider selected="true" editor-type-id="text-editor">
|
|
102
|
-
<state>
|
|
103
|
-
<caret column="
|
|
102
|
+
<state relative-caret-position="90">
|
|
103
|
+
<caret line="6" column="36" selection-start-line="6" selection-start-column="36" selection-end-line="6" selection-end-column="36" />
|
|
104
104
|
</state>
|
|
105
105
|
</provider>
|
|
106
106
|
</entry>
|
|
107
107
|
</file>
|
|
108
|
-
<file leaf-file-name="
|
|
109
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
108
|
+
<file leaf-file-name="proxy.rb" pinned="false" current-in-tab="false">
|
|
109
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
|
|
110
110
|
<provider selected="true" editor-type-id="text-editor">
|
|
111
|
-
<state relative-caret-position="
|
|
112
|
-
<caret line="
|
|
111
|
+
<state relative-caret-position="75">
|
|
112
|
+
<caret line="5" column="40" selection-start-line="5" selection-start-column="40" selection-end-line="5" selection-end-column="40" />
|
|
113
113
|
</state>
|
|
114
114
|
</provider>
|
|
115
115
|
</entry>
|
|
@@ -150,16 +150,16 @@
|
|
|
150
150
|
<option value="$PROJECT_DIR$/lib/http_crawler/test1.rb" />
|
|
151
151
|
<option value="$PROJECT_DIR$/lib/http_crawler/test.rb" />
|
|
152
152
|
<option value="$PROJECT_DIR$/lib/http_crawler/test2.rb" />
|
|
153
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/http.rb" />
|
|
154
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/web/client.rb" />
|
|
155
153
|
<option value="$PROJECT_DIR$/lib/http_crawler/common.rb" />
|
|
156
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/web.rb" />
|
|
157
154
|
<option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
|
|
158
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/proxy.rb" />
|
|
159
|
-
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb" />
|
|
160
155
|
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/client.rb" />
|
|
161
156
|
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb" />
|
|
157
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy.rb" />
|
|
158
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web.rb" />
|
|
159
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/http.rb" />
|
|
162
160
|
<option value="$PROJECT_DIR$/lib/http_crawler.rb" />
|
|
161
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/client.rb" />
|
|
162
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb" />
|
|
163
163
|
<option value="$PROJECT_DIR$/lib/http_crawler/version.rb" />
|
|
164
164
|
</list>
|
|
165
165
|
</option>
|
|
@@ -248,12 +248,12 @@
|
|
|
248
248
|
<option name="presentableId" value="Default" />
|
|
249
249
|
<updated>1545966039594</updated>
|
|
250
250
|
<workItem from="1545966041001" duration="9181000" />
|
|
251
|
-
<workItem from="1546164127129" duration="
|
|
251
|
+
<workItem from="1546164127129" duration="8638000" />
|
|
252
252
|
</task>
|
|
253
253
|
<servers />
|
|
254
254
|
</component>
|
|
255
255
|
<component name="TimeTrackingManager">
|
|
256
|
-
<option name="totallyTimeSpent" value="
|
|
256
|
+
<option name="totallyTimeSpent" value="17819000" />
|
|
257
257
|
</component>
|
|
258
258
|
<component name="ToolWindowManager">
|
|
259
259
|
<frame x="0" y="0" width="1680" height="1050" extended-state="6" />
|
|
@@ -387,16 +387,6 @@
|
|
|
387
387
|
</state>
|
|
388
388
|
</provider>
|
|
389
389
|
</entry>
|
|
390
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/http.rb">
|
|
391
|
-
<provider selected="true" editor-type-id="text-editor" />
|
|
392
|
-
</entry>
|
|
393
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/response.rb">
|
|
394
|
-
<provider selected="true" editor-type-id="text-editor">
|
|
395
|
-
<state relative-caret-position="15">
|
|
396
|
-
<caret line="6" column="45" lean-forward="true" selection-start-line="6" selection-start-column="45" selection-end-line="6" selection-end-column="45" />
|
|
397
|
-
</state>
|
|
398
|
-
</provider>
|
|
399
|
-
</entry>
|
|
400
390
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response.rb">
|
|
401
391
|
<provider selected="true" editor-type-id="text-editor">
|
|
402
392
|
<state relative-caret-position="120">
|
|
@@ -480,87 +470,101 @@
|
|
|
480
470
|
</state>
|
|
481
471
|
</provider>
|
|
482
472
|
</entry>
|
|
483
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
473
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response/get_proxy.rb">
|
|
484
474
|
<provider selected="true" editor-type-id="text-editor">
|
|
485
|
-
<state relative-caret-position="
|
|
486
|
-
<caret line="
|
|
475
|
+
<state relative-caret-position="90">
|
|
476
|
+
<caret line="6" column="20" lean-forward="true" selection-start-line="6" selection-start-column="20" selection-end-line="6" selection-end-column="20" />
|
|
487
477
|
</state>
|
|
488
478
|
</provider>
|
|
489
479
|
</entry>
|
|
490
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
480
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
|
|
481
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
482
|
+
<state relative-caret-position="75">
|
|
483
|
+
<caret line="5" lean-forward="true" selection-start-line="5" selection-end-line="5" />
|
|
484
|
+
</state>
|
|
485
|
+
</provider>
|
|
486
|
+
</entry>
|
|
487
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
|
491
488
|
<provider selected="true" editor-type-id="text-editor">
|
|
492
489
|
<state relative-caret-position="30">
|
|
493
490
|
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
|
494
491
|
</state>
|
|
495
492
|
</provider>
|
|
496
493
|
</entry>
|
|
497
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
|
494
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
|
|
498
495
|
<provider selected="true" editor-type-id="text-editor">
|
|
499
|
-
<state
|
|
500
|
-
<caret
|
|
496
|
+
<state>
|
|
497
|
+
<caret column="18" lean-forward="true" selection-end-column="57" />
|
|
501
498
|
</state>
|
|
502
499
|
</provider>
|
|
503
500
|
</entry>
|
|
504
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
501
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/http.rb">
|
|
505
502
|
<provider selected="true" editor-type-id="text-editor">
|
|
506
|
-
<state relative-caret-position="
|
|
507
|
-
<caret line="
|
|
503
|
+
<state relative-caret-position="30">
|
|
504
|
+
<caret line="2" lean-forward="true" selection-start-line="2" selection-end-line="2" />
|
|
508
505
|
</state>
|
|
509
506
|
</provider>
|
|
510
507
|
</entry>
|
|
511
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
508
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/response.rb">
|
|
512
509
|
<provider selected="true" editor-type-id="text-editor">
|
|
513
|
-
<state>
|
|
514
|
-
<caret column="
|
|
510
|
+
<state relative-caret-position="90">
|
|
511
|
+
<caret line="6" column="45" lean-forward="true" selection-start-line="6" selection-start-column="45" selection-end-line="6" selection-end-column="45" />
|
|
515
512
|
</state>
|
|
516
513
|
</provider>
|
|
517
514
|
</entry>
|
|
518
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
515
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
|
519
516
|
<provider selected="true" editor-type-id="text-editor">
|
|
520
|
-
<state relative-caret-position="
|
|
521
|
-
<caret line="
|
|
517
|
+
<state relative-caret-position="90">
|
|
518
|
+
<caret line="6" column="36" selection-start-line="6" selection-start-column="36" selection-end-line="6" selection-end-column="36" />
|
|
522
519
|
</state>
|
|
523
520
|
</provider>
|
|
524
521
|
</entry>
|
|
525
522
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
|
|
526
523
|
<provider selected="true" editor-type-id="text-editor">
|
|
527
|
-
<state relative-caret-position="
|
|
528
|
-
<caret line="
|
|
524
|
+
<state relative-caret-position="75">
|
|
525
|
+
<caret line="5" column="40" selection-start-line="5" selection-start-column="40" selection-end-line="5" selection-end-column="40" />
|
|
529
526
|
</state>
|
|
530
527
|
</provider>
|
|
531
528
|
</entry>
|
|
532
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler
|
|
529
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
|
533
530
|
<provider selected="true" editor-type-id="text-editor">
|
|
534
|
-
<state>
|
|
535
|
-
<caret column="
|
|
531
|
+
<state relative-caret-position="120">
|
|
532
|
+
<caret line="8" column="22" lean-forward="true" selection-start-line="8" selection-start-column="22" selection-end-line="8" selection-end-column="22" />
|
|
536
533
|
</state>
|
|
537
534
|
</provider>
|
|
538
535
|
</entry>
|
|
539
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
536
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web.rb">
|
|
540
537
|
<provider selected="true" editor-type-id="text-editor">
|
|
541
|
-
<state relative-caret-position="
|
|
542
|
-
<caret line="
|
|
538
|
+
<state relative-caret-position="45">
|
|
539
|
+
<caret line="3" column="41" selection-start-line="3" selection-start-column="41" selection-end-line="3" selection-end-column="41" />
|
|
543
540
|
</state>
|
|
544
541
|
</provider>
|
|
545
542
|
</entry>
|
|
546
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler/
|
|
543
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
|
547
544
|
<provider selected="true" editor-type-id="text-editor">
|
|
548
545
|
<state relative-caret-position="75">
|
|
549
|
-
<caret line="5"
|
|
546
|
+
<caret line="5" column="46" selection-start-line="5" selection-start-column="46" selection-end-line="5" selection-end-column="46" />
|
|
550
547
|
</state>
|
|
551
548
|
</provider>
|
|
552
549
|
</entry>
|
|
553
|
-
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
|
550
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
|
|
554
551
|
<provider selected="true" editor-type-id="text-editor">
|
|
555
|
-
<state relative-caret-position="
|
|
556
|
-
<caret line="
|
|
552
|
+
<state relative-caret-position="75">
|
|
553
|
+
<caret line="5" column="18" lean-forward="true" selection-start-line="5" selection-start-column="18" selection-end-line="5" selection-end-column="18" />
|
|
557
554
|
</state>
|
|
558
555
|
</provider>
|
|
559
556
|
</entry>
|
|
560
557
|
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
|
561
558
|
<provider selected="true" editor-type-id="text-editor">
|
|
562
|
-
<state relative-caret-position="
|
|
563
|
-
<caret line="
|
|
559
|
+
<state relative-caret-position="15">
|
|
560
|
+
<caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
|
|
561
|
+
</state>
|
|
562
|
+
</provider>
|
|
563
|
+
</entry>
|
|
564
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
|
565
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
566
|
+
<state relative-caret-position="-457">
|
|
567
|
+
<caret line="5" column="44" selection-start-line="5" selection-start-column="44" selection-end-line="5" selection-end-column="44" />
|
|
564
568
|
</state>
|
|
565
569
|
</provider>
|
|
566
570
|
</entry>
|
data/lib/http_crawler.rb
CHANGED
data/lib/http_crawler/http.rb
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
load File.dirname(__FILE__) + '/net/http.rb'
|
|
2
|
-
load File.dirname(__FILE__) + '/net/response.rb'
|
|
3
1
|
|
|
4
2
|
module HttpCrawler
|
|
5
3
|
class HTTP < Net::HTTP
|
|
6
4
|
|
|
5
|
+
load File.dirname(__FILE__) + '/net/http.rb'
|
|
6
|
+
load File.dirname(__FILE__) + '/net/response.rb'
|
|
7
|
+
|
|
7
8
|
# 自动获取代理,true 表示自动获取代理 、false 表示不自动获取
|
|
8
9
|
attr_accessor :auto_proxy
|
|
9
10
|
# 代理API的别名 主要关联 HttpCrawler::Proxy中维护的代理API
|
data/lib/http_crawler/proxy.rb
CHANGED
data/lib/http_crawler/version.rb
CHANGED
data/lib/http_crawler/web.rb
CHANGED