http_crawler 0.2.2.1 → 0.2.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.idea/.rakeTasks +7 -0
- data/.idea/http_crawler.iml +13 -0
- data/.idea/misc.xml +7 -0
- data/.idea/modules.xml +8 -0
- data/.idea/workspace.xml +533 -0
- data/lib/http_crawler/client.rb +1 -1
- data/lib/http_crawler/common.rb +3 -3
- data/lib/http_crawler/http.rb +23 -3
- data/lib/http_crawler/proxy.rb +1 -1
- data/lib/http_crawler/version.rb +1 -1
- data/lib/http_crawler.rb +3 -3
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 32a91911a47a0b39ca19d9655a8a364dc3416bd1
|
4
|
+
data.tar.gz: b62145f8d063c9825c5d93f1ec647f16995b8cad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e06a1f44aa20c7be935d0b5e0c56ed58c3f5c48bd85df5d2ca193f3174dd827a1df2f0b23d2372969cf182a92b7a6312d0108da1dfc11a880edf7768655fa23
|
7
|
+
data.tar.gz: 430f009ad66fd802ddf3cee8e619c2cd1b8047b333af1658b39806af18f23af572b861166d6030a80e4e6743944813b2a2317b6a9b653a9537e24b4d59cf613b
|
data/.idea/.rakeTasks
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<Settings><!--This file was automatically generated by Ruby plugin.
|
3
|
+
You are allowed to:
|
4
|
+
1. Remove rake task
|
5
|
+
2. Add existing rake tasks
|
6
|
+
To add existing rake tasks automatically delete this file and reload the project.
|
7
|
+
--><RakeGroup description="" fullCmd="" taksId="rake"><RakeTask description="Build http_crawler-0.1.6.gem into the pkg directory" fullCmd="build" taksId="build" /><RakeTask description="Remove any temporary products" fullCmd="clean" taksId="clean" /><RakeTask description="Remove any generated files" fullCmd="clobber" taksId="clobber" /><RakeTask description="Build and install http_crawler-0.1.6.gem into system gems" fullCmd="install" taksId="install" /><RakeGroup description="" fullCmd="" taksId="install"><RakeTask description="Build and install http_crawler-0.1.6.gem into system gems without network access" fullCmd="install:local" taksId="local" /></RakeGroup><RakeTask description="Create tag v0.1.6 and build and push http_crawler-0.1.6.gem to Rubygems" fullCmd="release[remote]" taksId="release[remote]" /><RakeTask description="" fullCmd="default" taksId="default" /><RakeTask description="" fullCmd="release" taksId="release" /><RakeGroup description="" fullCmd="" taksId="release"><RakeTask description="" fullCmd="release:guard_clean" taksId="guard_clean" /><RakeTask description="" fullCmd="release:rubygem_push" taksId="rubygem_push" /><RakeTask description="" fullCmd="release:source_control_push" taksId="source_control_push" /></RakeGroup></RakeGroup></Settings>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="ModuleRunConfigurationManager">
|
4
|
+
<shared />
|
5
|
+
</component>
|
6
|
+
<component name="NewModuleRootManager">
|
7
|
+
<content url="file://$MODULE_DIR$" />
|
8
|
+
<orderEntry type="inheritedJdk" />
|
9
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
10
|
+
<orderEntry type="library" scope="PROVIDED" name="bundler (v1.15.1, RVM: ruby-2.4.1) [gem]" level="application" />
|
11
|
+
<orderEntry type="library" scope="PROVIDED" name="nokogiri (v1.8.0, RVM: ruby-2.4.1) [gem]" level="application" />
|
12
|
+
</component>
|
13
|
+
</module>
|
data/.idea/misc.xml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="JavaScriptSettings">
|
4
|
+
<option name="languageLevel" value="ES6" />
|
5
|
+
</component>
|
6
|
+
<component name="ProjectRootManager" version="2" project-jdk-name="RVM: ruby-2.4.1" project-jdk-type="RUBY_SDK" />
|
7
|
+
</project>
|
data/.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="ProjectModuleManager">
|
4
|
+
<modules>
|
5
|
+
<module fileurl="file://$PROJECT_DIR$/.idea/http_crawler.iml" filepath="$PROJECT_DIR$/.idea/http_crawler.iml" />
|
6
|
+
</modules>
|
7
|
+
</component>
|
8
|
+
</project>
|
data/.idea/workspace.xml
ADDED
@@ -0,0 +1,533 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="ChangeListManager">
|
4
|
+
<list default="true" id="07223dd4-8944-486b-a29b-7461a5c9ec2d" name="Default" comment="">
|
5
|
+
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
6
|
+
<change beforePath="$PROJECT_DIR$/lib/http_crawler/http.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/http.rb" afterDir="false" />
|
7
|
+
<change beforePath="$PROJECT_DIR$/lib/http_crawler/version.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/version.rb" afterDir="false" />
|
8
|
+
</list>
|
9
|
+
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
10
|
+
<option name="TRACKING_ENABLED" value="true" />
|
11
|
+
<option name="SHOW_DIALOG" value="false" />
|
12
|
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
13
|
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
14
|
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
15
|
+
</component>
|
16
|
+
<component name="FileEditorManager">
|
17
|
+
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
18
|
+
<file leaf-file-name="version.rb" pinned="false" current-in-tab="true">
|
19
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
20
|
+
<provider selected="true" editor-type-id="text-editor">
|
21
|
+
<state relative-caret-position="15">
|
22
|
+
<caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
|
23
|
+
</state>
|
24
|
+
</provider>
|
25
|
+
</entry>
|
26
|
+
</file>
|
27
|
+
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
28
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
|
29
|
+
<provider selected="true" editor-type-id="text-editor">
|
30
|
+
<state relative-caret-position="30">
|
31
|
+
<caret line="2" column="17" selection-start-line="2" selection-start-column="17" selection-end-line="2" selection-end-column="17" />
|
32
|
+
</state>
|
33
|
+
</provider>
|
34
|
+
</entry>
|
35
|
+
</file>
|
36
|
+
<file leaf-file-name="http_crawler.rb" pinned="false" current-in-tab="false">
|
37
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
38
|
+
<provider selected="true" editor-type-id="text-editor">
|
39
|
+
<state relative-caret-position="120">
|
40
|
+
<caret line="8" lean-forward="true" selection-start-line="8" selection-end-line="8" />
|
41
|
+
</state>
|
42
|
+
</provider>
|
43
|
+
</entry>
|
44
|
+
</file>
|
45
|
+
<file leaf-file-name="http.rb" pinned="false" current-in-tab="false">
|
46
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
47
|
+
<provider selected="true" editor-type-id="text-editor">
|
48
|
+
<state relative-caret-position="278">
|
49
|
+
<caret line="165" column="41" selection-start-line="165" selection-start-column="41" selection-end-line="165" selection-end-column="41" />
|
50
|
+
</state>
|
51
|
+
</provider>
|
52
|
+
</entry>
|
53
|
+
</file>
|
54
|
+
<file leaf-file-name="common.rb" pinned="false" current-in-tab="false">
|
55
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
56
|
+
<provider selected="true" editor-type-id="text-editor">
|
57
|
+
<state relative-caret-position="45">
|
58
|
+
<caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
|
59
|
+
</state>
|
60
|
+
</provider>
|
61
|
+
</entry>
|
62
|
+
</file>
|
63
|
+
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
64
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
65
|
+
<provider selected="true" editor-type-id="text-editor">
|
66
|
+
<state relative-caret-position="-99">
|
67
|
+
<caret line="26" column="4" selection-start-line="26" selection-start-column="4" selection-end-line="26" selection-end-column="4" />
|
68
|
+
</state>
|
69
|
+
</provider>
|
70
|
+
</entry>
|
71
|
+
</file>
|
72
|
+
<file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
|
73
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
74
|
+
<provider selected="true" editor-type-id="text-editor">
|
75
|
+
<state relative-caret-position="90">
|
76
|
+
<caret line="6" column="3" lean-forward="true" selection-start-line="6" selection-start-column="3" selection-end-line="6" selection-end-column="3" />
|
77
|
+
</state>
|
78
|
+
</provider>
|
79
|
+
</entry>
|
80
|
+
</file>
|
81
|
+
<file leaf-file-name="proxy.rb" pinned="false" current-in-tab="false">
|
82
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
|
83
|
+
<provider selected="true" editor-type-id="text-editor">
|
84
|
+
<state relative-caret-position="30">
|
85
|
+
<caret line="2" column="4" selection-start-line="2" selection-end-line="2" selection-end-column="4" />
|
86
|
+
</state>
|
87
|
+
</provider>
|
88
|
+
</entry>
|
89
|
+
</file>
|
90
|
+
</leaf>
|
91
|
+
</component>
|
92
|
+
<component name="FindInProjectRecents">
|
93
|
+
<findStrings>
|
94
|
+
<find>Crawler::Web</find>
|
95
|
+
<find>"Crawler</find>
|
96
|
+
</findStrings>
|
97
|
+
<replaceStrings>
|
98
|
+
<replace>HttpCrawler::Web</replace>
|
99
|
+
<replace>HttpCrawler</replace>
|
100
|
+
</replaceStrings>
|
101
|
+
<dirStrings>
|
102
|
+
<dir>$PROJECT_DIR$</dir>
|
103
|
+
</dirStrings>
|
104
|
+
</component>
|
105
|
+
<component name="Git.Settings">
|
106
|
+
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
107
|
+
</component>
|
108
|
+
<component name="IdeDocumentHistory">
|
109
|
+
<option name="CHANGED_PATHS">
|
110
|
+
<list>
|
111
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/laofu/client.rb" />
|
112
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/README.md" />
|
113
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb" />
|
114
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response/get_proxy.rb" />
|
115
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response.rb" />
|
116
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/client.rb" />
|
117
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/response.rb" />
|
118
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/response/index.rb" />
|
119
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/response.rb" />
|
120
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web.rb" />
|
121
|
+
<option value="$PROJECT_DIR$/README.md" />
|
122
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb" />
|
123
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/README.md" />
|
124
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/README.md" />
|
125
|
+
<option value="$PROJECT_DIR$/http_crawler.gemspec" />
|
126
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/web/client.rb" />
|
127
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/proxy.rb" />
|
128
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/common.rb" />
|
129
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
|
130
|
+
<option value="$PROJECT_DIR$/lib/test.rb" />
|
131
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/test1.rb" />
|
132
|
+
<option value="$PROJECT_DIR$/lib/http_crawler.rb" />
|
133
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/test.rb" />
|
134
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/test2.rb" />
|
135
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/http.rb" />
|
136
|
+
<option value="$PROJECT_DIR$/lib/http_crawler/version.rb" />
|
137
|
+
</list>
|
138
|
+
</option>
|
139
|
+
</component>
|
140
|
+
<component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
|
141
|
+
<component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
|
142
|
+
<component name="JsGulpfileManager">
|
143
|
+
<detection-done>true</detection-done>
|
144
|
+
<sorting>DEFINITION_ORDER</sorting>
|
145
|
+
</component>
|
146
|
+
<component name="NodePackageJsonFileManager">
|
147
|
+
<packageJsonPaths />
|
148
|
+
</component>
|
149
|
+
<component name="ProjectFrameBounds" extendedState="6" fullScreen="true">
|
150
|
+
<option name="y" value="23" />
|
151
|
+
<option name="width" value="1680" />
|
152
|
+
<option name="height" value="1027" />
|
153
|
+
</component>
|
154
|
+
<component name="ProjectView">
|
155
|
+
<navigator proportions="" version="1">
|
156
|
+
<foldersAlwaysOnTop value="true" />
|
157
|
+
</navigator>
|
158
|
+
<panes>
|
159
|
+
<pane id="ProjectPane">
|
160
|
+
<subPane>
|
161
|
+
<expand>
|
162
|
+
<path>
|
163
|
+
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
164
|
+
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
165
|
+
</path>
|
166
|
+
<path>
|
167
|
+
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
168
|
+
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
169
|
+
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
170
|
+
</path>
|
171
|
+
<path>
|
172
|
+
<item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
|
173
|
+
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
174
|
+
<item name="lib" type="462c0819:PsiDirectoryNode" />
|
175
|
+
<item name="http_crawler" type="462c0819:PsiDirectoryNode" />
|
176
|
+
</path>
|
177
|
+
</expand>
|
178
|
+
<select />
|
179
|
+
</subPane>
|
180
|
+
</pane>
|
181
|
+
<pane id="Scope" />
|
182
|
+
</panes>
|
183
|
+
</component>
|
184
|
+
<component name="PropertiesComponent">
|
185
|
+
<property name="WebServerToolWindowFactoryState" value="false" />
|
186
|
+
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
|
187
|
+
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
|
188
|
+
<property name="nodejs_npm_path_reset_for_default_project" value="true" />
|
189
|
+
</component>
|
190
|
+
<component name="RecentsManager">
|
191
|
+
<key name="MoveFile.RECENT_KEYS">
|
192
|
+
<recent name="$PROJECT_DIR$/lib/http_crawler/common" />
|
193
|
+
</key>
|
194
|
+
<key name="CopyFile.RECENT_KEYS">
|
195
|
+
<recent name="$PROJECT_DIR$/lib/http_crawler" />
|
196
|
+
<recent name="$PROJECT_DIR$/lib/http_crawler/web" />
|
197
|
+
</key>
|
198
|
+
</component>
|
199
|
+
<component name="RunDashboard">
|
200
|
+
<option name="ruleStates">
|
201
|
+
<list>
|
202
|
+
<RuleState>
|
203
|
+
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
|
204
|
+
</RuleState>
|
205
|
+
<RuleState>
|
206
|
+
<option name="name" value="StatusDashboardGroupingRule" />
|
207
|
+
</RuleState>
|
208
|
+
</list>
|
209
|
+
</option>
|
210
|
+
</component>
|
211
|
+
<component name="SpringUtil" SPRING_PRE_LOADER_OPTION="true" />
|
212
|
+
<component name="SvnConfiguration">
|
213
|
+
<configuration />
|
214
|
+
</component>
|
215
|
+
<component name="TaskManager">
|
216
|
+
<task active="true" id="Default" summary="Default task">
|
217
|
+
<changelist id="07223dd4-8944-486b-a29b-7461a5c9ec2d" name="Default" comment="" />
|
218
|
+
<created>1545966039594</created>
|
219
|
+
<option name="number" value="Default" />
|
220
|
+
<option name="presentableId" value="Default" />
|
221
|
+
<updated>1545966039594</updated>
|
222
|
+
<workItem from="1545966041001" duration="9181000" />
|
223
|
+
<workItem from="1546164127129" duration="7006000" />
|
224
|
+
</task>
|
225
|
+
<servers />
|
226
|
+
</component>
|
227
|
+
<component name="TimeTrackingManager">
|
228
|
+
<option name="totallyTimeSpent" value="16187000" />
|
229
|
+
</component>
|
230
|
+
<component name="ToolWindowManager">
|
231
|
+
<frame x="0" y="0" width="1680" height="1050" extended-state="6" />
|
232
|
+
<editor active="true" />
|
233
|
+
<layout>
|
234
|
+
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24603175" />
|
235
|
+
<window_info anchor="bottom" id="TODO" order="6" />
|
236
|
+
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
|
237
|
+
<window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
|
238
|
+
<window_info anchor="right" id="Database" order="3" />
|
239
|
+
<window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
|
240
|
+
<window_info anchor="bottom" id="Version Control" order="7" />
|
241
|
+
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
|
242
|
+
<window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.32902583" />
|
243
|
+
<window_info id="Favorites" order="2" side_tool="true" />
|
244
|
+
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
245
|
+
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
246
|
+
<window_info anchor="right" id="Commander" order="0" weight="0.4" />
|
247
|
+
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
|
248
|
+
<window_info anchor="bottom" id="Run" order="2" />
|
249
|
+
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
|
250
|
+
<window_info anchor="bottom" id="Message" order="0" />
|
251
|
+
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
|
252
|
+
<window_info anchor="bottom" id="Find" order="1" />
|
253
|
+
</layout>
|
254
|
+
</component>
|
255
|
+
<component name="TypeScriptGeneratedFilesManager">
|
256
|
+
<option name="version" value="1" />
|
257
|
+
</component>
|
258
|
+
<component name="VcsContentAnnotationSettings">
|
259
|
+
<option name="myLimit" value="2678400000" />
|
260
|
+
</component>
|
261
|
+
<component name="editorHistoryManager">
|
262
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
263
|
+
<provider selected="true" editor-type-id="text-editor">
|
264
|
+
<state relative-caret-position="45">
|
265
|
+
<caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
|
266
|
+
</state>
|
267
|
+
</provider>
|
268
|
+
</entry>
|
269
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
|
270
|
+
<provider selected="true" editor-type-id="text-editor">
|
271
|
+
<state relative-caret-position="30">
|
272
|
+
<caret line="2" column="17" lean-forward="true" selection-start-line="2" selection-start-column="17" selection-end-line="2" selection-end-column="17" />
|
273
|
+
</state>
|
274
|
+
</provider>
|
275
|
+
</entry>
|
276
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
277
|
+
<provider selected="true" editor-type-id="text-editor">
|
278
|
+
<state relative-caret-position="120">
|
279
|
+
<caret line="8" lean-forward="true" selection-start-line="8" selection-end-line="8" />
|
280
|
+
</state>
|
281
|
+
</provider>
|
282
|
+
</entry>
|
283
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
284
|
+
<provider selected="true" editor-type-id="text-editor">
|
285
|
+
<state>
|
286
|
+
<caret column="4" selection-start-column="4" selection-end-column="4" />
|
287
|
+
</state>
|
288
|
+
</provider>
|
289
|
+
</entry>
|
290
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
291
|
+
<provider selected="true" editor-type-id="text-editor">
|
292
|
+
<state relative-caret-position="45">
|
293
|
+
<caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
|
294
|
+
</state>
|
295
|
+
</provider>
|
296
|
+
</entry>
|
297
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
298
|
+
<provider selected="true" editor-type-id="text-editor">
|
299
|
+
<state relative-caret-position="375">
|
300
|
+
<caret line="25" lean-forward="true" selection-start-line="25" selection-end-line="25" />
|
301
|
+
</state>
|
302
|
+
</provider>
|
303
|
+
</entry>
|
304
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
305
|
+
<provider selected="true" editor-type-id="text-editor">
|
306
|
+
<state relative-caret-position="90">
|
307
|
+
<caret line="6" column="3" lean-forward="true" selection-start-line="6" selection-start-column="3" selection-end-line="6" selection-end-column="3" />
|
308
|
+
</state>
|
309
|
+
</provider>
|
310
|
+
</entry>
|
311
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
|
312
|
+
<provider selected="true" editor-type-id="text-editor">
|
313
|
+
<state relative-caret-position="30">
|
314
|
+
<caret line="2" column="4" selection-start-line="2" selection-end-line="2" selection-end-column="4" />
|
315
|
+
</state>
|
316
|
+
</provider>
|
317
|
+
</entry>
|
318
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/drive.rb" />
|
319
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/README.md">
|
320
|
+
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
321
|
+
<state split_layout="SPLIT">
|
322
|
+
<first_editor relative-caret-position="45">
|
323
|
+
<caret line="3" column="11" selection-start-line="3" selection-start-column="11" selection-end-line="3" selection-end-column="11" />
|
324
|
+
</first_editor>
|
325
|
+
<second_editor />
|
326
|
+
</state>
|
327
|
+
</provider>
|
328
|
+
</entry>
|
329
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/response.rb">
|
330
|
+
<provider selected="true" editor-type-id="text-editor">
|
331
|
+
<state relative-caret-position="30">
|
332
|
+
<caret line="2" column="18" selection-start-line="2" selection-start-column="18" selection-end-line="2" selection-end-column="18" />
|
333
|
+
</state>
|
334
|
+
</provider>
|
335
|
+
</entry>
|
336
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/web.rb">
|
337
|
+
<provider selected="true" editor-type-id="text-editor">
|
338
|
+
<state relative-caret-position="30">
|
339
|
+
<caret line="2" column="18" selection-start-line="2" selection-start-column="18" selection-end-line="2" selection-end-column="18" />
|
340
|
+
</state>
|
341
|
+
</provider>
|
342
|
+
</entry>
|
343
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
|
344
|
+
<provider selected="true" editor-type-id="text-editor" />
|
345
|
+
</entry>
|
346
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common/object.rb">
|
347
|
+
<provider selected="true" editor-type-id="text-editor" />
|
348
|
+
</entry>
|
349
|
+
<entry file="file://$PROJECT_DIR$/README.md">
|
350
|
+
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
351
|
+
<state split_layout="SPLIT">
|
352
|
+
<first_editor relative-caret-position="255">
|
353
|
+
<caret line="17" lean-forward="true" selection-start-line="17" selection-end-line="17" />
|
354
|
+
</first_editor>
|
355
|
+
<second_editor />
|
356
|
+
</state>
|
357
|
+
</provider>
|
358
|
+
</entry>
|
359
|
+
<entry file="file://$PROJECT_DIR$/CODE_OF_CONDUCT.md">
|
360
|
+
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
361
|
+
<state split_layout="SPLIT">
|
362
|
+
<first_editor relative-caret-position="1110">
|
363
|
+
<caret line="74" lean-forward="true" selection-start-line="74" selection-end-line="74" />
|
364
|
+
</first_editor>
|
365
|
+
<second_editor />
|
366
|
+
</state>
|
367
|
+
</provider>
|
368
|
+
</entry>
|
369
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/http.rb">
|
370
|
+
<provider selected="true" editor-type-id="text-editor" />
|
371
|
+
</entry>
|
372
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/net/response.rb">
|
373
|
+
<provider selected="true" editor-type-id="text-editor">
|
374
|
+
<state relative-caret-position="15">
|
375
|
+
<caret line="6" column="45" lean-forward="true" selection-start-line="6" selection-start-column="45" selection-end-line="6" selection-end-column="45" />
|
376
|
+
</state>
|
377
|
+
</provider>
|
378
|
+
</entry>
|
379
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response/get_proxy.rb">
|
380
|
+
<provider selected="true" editor-type-id="text-editor">
|
381
|
+
<state relative-caret-position="15">
|
382
|
+
<caret line="1" column="18" selection-start-line="1" selection-start-column="18" selection-end-line="1" selection-end-column="18" />
|
383
|
+
</state>
|
384
|
+
</provider>
|
385
|
+
</entry>
|
386
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response.rb">
|
387
|
+
<provider selected="true" editor-type-id="text-editor">
|
388
|
+
<state relative-caret-position="120">
|
389
|
+
<caret line="8" column="7" lean-forward="true" selection-start-line="8" selection-start-column="7" selection-end-line="8" selection-end-column="7" />
|
390
|
+
</state>
|
391
|
+
</provider>
|
392
|
+
</entry>
|
393
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/response/index.rb">
|
394
|
+
<provider selected="true" editor-type-id="text-editor">
|
395
|
+
<state relative-caret-position="15">
|
396
|
+
<caret line="1" column="18" selection-start-line="1" selection-start-column="18" selection-end-line="1" selection-end-column="18" />
|
397
|
+
</state>
|
398
|
+
</provider>
|
399
|
+
</entry>
|
400
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/README.md">
|
401
|
+
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
402
|
+
<state split_layout="SPLIT">
|
403
|
+
<first_editor relative-caret-position="240">
|
404
|
+
<caret line="16" lean-forward="true" selection-start-line="16" selection-end-line="18" selection-end-column="15" />
|
405
|
+
</first_editor>
|
406
|
+
<second_editor />
|
407
|
+
</state>
|
408
|
+
</provider>
|
409
|
+
</entry>
|
410
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/README.md">
|
411
|
+
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
412
|
+
<state split_layout="SPLIT">
|
413
|
+
<first_editor relative-caret-position="45">
|
414
|
+
<caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
|
415
|
+
</first_editor>
|
416
|
+
<second_editor />
|
417
|
+
</state>
|
418
|
+
</provider>
|
419
|
+
</entry>
|
420
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/README.md">
|
421
|
+
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
422
|
+
<state split_layout="SPLIT">
|
423
|
+
<first_editor relative-caret-position="60">
|
424
|
+
<caret line="4" lean-forward="true" selection-start-line="4" selection-end-line="4" />
|
425
|
+
</first_editor>
|
426
|
+
<second_editor />
|
427
|
+
</state>
|
428
|
+
</provider>
|
429
|
+
</entry>
|
430
|
+
<entry file="file://$PROJECT_DIR$/http_crawler.gemspec">
|
431
|
+
<provider selected="true" editor-type-id="text-editor">
|
432
|
+
<state relative-caret-position="330">
|
433
|
+
<caret line="22" column="26" lean-forward="true" selection-start-line="22" selection-start-column="26" selection-end-line="22" selection-end-column="26" />
|
434
|
+
</state>
|
435
|
+
</provider>
|
436
|
+
</entry>
|
437
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/response.rb">
|
438
|
+
<provider selected="true" editor-type-id="text-editor">
|
439
|
+
<state relative-caret-position="15">
|
440
|
+
<caret line="1" column="18" selection-start-line="1" selection-start-column="18" selection-end-line="1" selection-end-column="18" />
|
441
|
+
</state>
|
442
|
+
</provider>
|
443
|
+
</entry>
|
444
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
|
445
|
+
<provider selected="true" editor-type-id="text-editor">
|
446
|
+
<state relative-caret-position="75">
|
447
|
+
<caret line="5" lean-forward="true" selection-start-line="5" selection-end-line="5" />
|
448
|
+
</state>
|
449
|
+
</provider>
|
450
|
+
</entry>
|
451
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
|
452
|
+
<provider selected="true" editor-type-id="text-editor">
|
453
|
+
<state>
|
454
|
+
<caret column="18" selection-start-column="7" selection-end-column="18" />
|
455
|
+
</state>
|
456
|
+
</provider>
|
457
|
+
</entry>
|
458
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
|
459
|
+
<provider selected="true" editor-type-id="text-editor">
|
460
|
+
<state relative-caret-position="90">
|
461
|
+
<caret line="6" column="3" lean-forward="true" selection-start-line="6" selection-start-column="3" selection-end-line="6" selection-end-column="3" />
|
462
|
+
</state>
|
463
|
+
</provider>
|
464
|
+
</entry>
|
465
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
|
466
|
+
<provider selected="true" editor-type-id="text-editor">
|
467
|
+
<state relative-caret-position="30">
|
468
|
+
<caret line="2" column="4" selection-start-line="2" selection-end-line="2" selection-end-column="4" />
|
469
|
+
</state>
|
470
|
+
</provider>
|
471
|
+
</entry>
|
472
|
+
<entry file="file://$PROJECT_DIR$/lib/test.rb" />
|
473
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/test1.rb" />
|
474
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/test.rb" />
|
475
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/test2.rb" />
|
476
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
|
477
|
+
<provider selected="true" editor-type-id="text-editor">
|
478
|
+
<state relative-caret-position="45">
|
479
|
+
<caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
|
480
|
+
</state>
|
481
|
+
</provider>
|
482
|
+
</entry>
|
483
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
|
484
|
+
<provider selected="true" editor-type-id="text-editor">
|
485
|
+
<state relative-caret-position="30">
|
486
|
+
<caret line="2" column="17" selection-start-line="2" selection-start-column="17" selection-end-line="2" selection-end-column="17" />
|
487
|
+
</state>
|
488
|
+
</provider>
|
489
|
+
</entry>
|
490
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
|
491
|
+
<provider selected="true" editor-type-id="text-editor">
|
492
|
+
<state relative-caret-position="120">
|
493
|
+
<caret line="8" lean-forward="true" selection-start-line="8" selection-end-line="8" />
|
494
|
+
</state>
|
495
|
+
</provider>
|
496
|
+
</entry>
|
497
|
+
<entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/net/http/responses.rb">
|
498
|
+
<provider selected="true" editor-type-id="text-editor">
|
499
|
+
<state relative-caret-position="215">
|
500
|
+
<caret line="113" column="24" lean-forward="true" selection-start-line="113" selection-start-column="24" selection-end-line="113" selection-end-column="24" />
|
501
|
+
</state>
|
502
|
+
</provider>
|
503
|
+
</entry>
|
504
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
|
505
|
+
<provider selected="true" editor-type-id="text-editor">
|
506
|
+
<state relative-caret-position="-99">
|
507
|
+
<caret line="26" column="4" selection-start-line="26" selection-start-column="4" selection-end-line="26" selection-end-column="4" />
|
508
|
+
</state>
|
509
|
+
</provider>
|
510
|
+
</entry>
|
511
|
+
<entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/net/http.rb">
|
512
|
+
<provider selected="true" editor-type-id="text-editor">
|
513
|
+
<state relative-caret-position="15">
|
514
|
+
<caret line="1065" column="8" selection-start-line="1065" selection-start-column="8" selection-end-line="1065" selection-end-column="8" />
|
515
|
+
</state>
|
516
|
+
</provider>
|
517
|
+
</entry>
|
518
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
|
519
|
+
<provider selected="true" editor-type-id="text-editor">
|
520
|
+
<state relative-caret-position="278">
|
521
|
+
<caret line="165" column="41" selection-start-line="165" selection-start-column="41" selection-end-line="165" selection-end-column="41" />
|
522
|
+
</state>
|
523
|
+
</provider>
|
524
|
+
</entry>
|
525
|
+
<entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
|
526
|
+
<provider selected="true" editor-type-id="text-editor">
|
527
|
+
<state relative-caret-position="15">
|
528
|
+
<caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
|
529
|
+
</state>
|
530
|
+
</provider>
|
531
|
+
</entry>
|
532
|
+
</component>
|
533
|
+
</project>
|
data/lib/http_crawler/client.rb
CHANGED
data/lib/http_crawler/common.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
load File.dirname(__FILE__) + '/http.rb'
|
2
|
+
load File.dirname(__FILE__) + '/object.rb'
|
3
|
+
load File.dirname(__FILE__) + '/string.rb'
|
data/lib/http_crawler/http.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
load File.dirname(__FILE__) + '/net/http.rb'
|
2
|
+
load File.dirname(__FILE__) + '/net/response.rb'
|
3
3
|
|
4
4
|
module HttpCrawler
|
5
5
|
class HTTP < Net::HTTP
|
@@ -31,6 +31,7 @@ module HttpCrawler
|
|
31
31
|
def proxy_api
|
32
32
|
@proxy_api ||= "my"
|
33
33
|
end
|
34
|
+
|
34
35
|
@@proxy_list = []
|
35
36
|
# 为 @http 重设代理
|
36
37
|
def proxy(p = {})
|
@@ -127,6 +128,15 @@ module HttpCrawler
|
|
127
128
|
server_error_sleep
|
128
129
|
# 重新请求
|
129
130
|
get_fetch(uri_or_path, initheader, dest, &block)
|
131
|
+
when Net::HTTPProxyAuthenticationRequired then
|
132
|
+
Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{@proxy_address}:#{@proxy_port}] =>#{address}"
|
133
|
+
if update_proxy?
|
134
|
+
server_error_sleep
|
135
|
+
# 重新请求
|
136
|
+
get_fetch(uri_or_path, initheader, dest, &block)
|
137
|
+
else
|
138
|
+
response.error!
|
139
|
+
end
|
130
140
|
else
|
131
141
|
server_error_sleep
|
132
142
|
response.error!
|
@@ -137,7 +147,7 @@ module HttpCrawler
|
|
137
147
|
def post_fetch(uri_or_path, data, initheader = nil, dest = nil, &block)
|
138
148
|
# 更新uri_or_path 如果 uri_or_path 是 String类型 同时 又不是 ascii编码格式就进行转码
|
139
149
|
uri_or_path = URI.encode(uri_or_path) if String === uri_or_path && CharDet.detect(uri_or_path)["encoding"] != "ascii"
|
140
|
-
Rails.logger.debug "post_fetch => #{uri_or_path}"
|
150
|
+
# Rails.logger.debug "post_fetch => #{uri_or_path}"
|
141
151
|
response = post(uri_or_path, data, initheader, dest, &block)
|
142
152
|
case response
|
143
153
|
when Net::HTTPSuccess then
|
@@ -152,6 +162,15 @@ module HttpCrawler
|
|
152
162
|
server_error_sleep
|
153
163
|
# 重新请求
|
154
164
|
post_fetch(uri_or_path, initheader, dest, &block)
|
165
|
+
when Net::HTTPProxyAuthenticationRequired then
|
166
|
+
Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{http.proxy_address}:#{http.proxy_port}] =>#{address}"
|
167
|
+
if update_proxy?
|
168
|
+
server_error_sleep
|
169
|
+
# 重新请求
|
170
|
+
post_fetch(uri_or_path, initheader, dest, &block)
|
171
|
+
else
|
172
|
+
response.error!
|
173
|
+
end
|
155
174
|
else
|
156
175
|
server_error_sleep
|
157
176
|
response.error!
|
@@ -166,6 +185,7 @@ module HttpCrawler
|
|
166
185
|
def request(req, body = nil, &block)
|
167
186
|
begin
|
168
187
|
Rails.logger.debug("#{req.class} => #{use_ssl? ? "https://" : "http://" }#{address}:#{port}#{req.path}") if started?
|
188
|
+
Rails.logger.debug("body => #{body}") if started? && body
|
169
189
|
super(req, body, &block)
|
170
190
|
rescue => error
|
171
191
|
if started?
|
data/lib/http_crawler/proxy.rb
CHANGED
data/lib/http_crawler/version.rb
CHANGED
data/lib/http_crawler.rb
CHANGED
@@ -3,9 +3,9 @@ require 'json'
|
|
3
3
|
require 'digest/md5'
|
4
4
|
require 'nokogiri'
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
load 'http_crawler/client.rb'
|
7
|
+
load 'http_crawler/proxy.rb'
|
8
|
+
load 'http_crawler/http.rb'
|
9
9
|
|
10
10
|
module HttpCrawler
|
11
11
|
# Your code goes here...
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.2.
|
4
|
+
version: 0.2.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-12-
|
11
|
+
date: 2018-12-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -116,7 +116,12 @@ extensions: []
|
|
116
116
|
extra_rdoc_files: []
|
117
117
|
files:
|
118
118
|
- ".gitignore"
|
119
|
+
- ".idea/.rakeTasks"
|
120
|
+
- ".idea/http_crawler.iml"
|
121
|
+
- ".idea/misc.xml"
|
122
|
+
- ".idea/modules.xml"
|
119
123
|
- ".idea/vcs.xml"
|
124
|
+
- ".idea/workspace.xml"
|
120
125
|
- ".rspec"
|
121
126
|
- CODE_OF_CONDUCT.md
|
122
127
|
- Gemfile
|