http_crawler 0.2.2.1 → 0.2.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd9558feda79b2d61eb24cde356bd116ffba561f
4
- data.tar.gz: 5ad0e3f3067d2034f3de5da093742b0abbe3481b
3
+ metadata.gz: 32a91911a47a0b39ca19d9655a8a364dc3416bd1
4
+ data.tar.gz: b62145f8d063c9825c5d93f1ec647f16995b8cad
5
5
  SHA512:
6
- metadata.gz: ea2dd05c3bdb2ebfe80f859f1aafe9f1addab311d9a343bea89b5e1d8e0c8958bb9890ab83f9495c5edd872c8b9d1f50c7ddc05b884c090ba48876913c9485cc
7
- data.tar.gz: cc4413f1f01571455991d52e6ad3137f50de0fe63101d1acd3a16c95d772be654f1efca23cf503a0f781a652ea585408351399a974aa66566f98d5a906b98f7a
6
+ metadata.gz: 0e06a1f44aa20c7be935d0b5e0c56ed58c3f5c48bd85df5d2ca193f3174dd827a1df2f0b23d2372969cf182a92b7a6312d0108da1dfc11a880edf7768655fa23
7
+ data.tar.gz: 430f009ad66fd802ddf3cee8e619c2cd1b8047b333af1658b39806af18f23af572b861166d6030a80e4e6743944813b2a2317b6a9b653a9537e24b4d59cf613b
data/.idea/.rakeTasks ADDED
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <Settings><!--This file was automatically generated by Ruby plugin.
3
+ You are allowed to:
4
+ 1. Remove rake task
5
+ 2. Add existing rake tasks
6
+ To add existing rake tasks automatically delete this file and reload the project.
7
+ --><RakeGroup description="" fullCmd="" taksId="rake"><RakeTask description="Build http_crawler-0.1.6.gem into the pkg directory" fullCmd="build" taksId="build" /><RakeTask description="Remove any temporary products" fullCmd="clean" taksId="clean" /><RakeTask description="Remove any generated files" fullCmd="clobber" taksId="clobber" /><RakeTask description="Build and install http_crawler-0.1.6.gem into system gems" fullCmd="install" taksId="install" /><RakeGroup description="" fullCmd="" taksId="install"><RakeTask description="Build and install http_crawler-0.1.6.gem into system gems without network access" fullCmd="install:local" taksId="local" /></RakeGroup><RakeTask description="Create tag v0.1.6 and build and push http_crawler-0.1.6.gem to Rubygems" fullCmd="release[remote]" taksId="release[remote]" /><RakeTask description="" fullCmd="default" taksId="default" /><RakeTask description="" fullCmd="release" taksId="release" /><RakeGroup description="" fullCmd="" taksId="release"><RakeTask description="" fullCmd="release:guard_clean" taksId="guard_clean" /><RakeTask description="" fullCmd="release:rubygem_push" taksId="rubygem_push" /><RakeTask description="" fullCmd="release:source_control_push" taksId="source_control_push" /></RakeGroup></RakeGroup></Settings>
@@ -0,0 +1,13 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="RUBY_MODULE" version="4">
3
+ <component name="ModuleRunConfigurationManager">
4
+ <shared />
5
+ </component>
6
+ <component name="NewModuleRootManager">
7
+ <content url="file://$MODULE_DIR$" />
8
+ <orderEntry type="inheritedJdk" />
9
+ <orderEntry type="sourceFolder" forTests="false" />
10
+ <orderEntry type="library" scope="PROVIDED" name="bundler (v1.15.1, RVM: ruby-2.4.1) [gem]" level="application" />
11
+ <orderEntry type="library" scope="PROVIDED" name="nokogiri (v1.8.0, RVM: ruby-2.4.1) [gem]" level="application" />
12
+ </component>
13
+ </module>
data/.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="JavaScriptSettings">
4
+ <option name="languageLevel" value="ES6" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="RVM: ruby-2.4.1" project-jdk-type="RUBY_SDK" />
7
+ </project>
data/.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/http_crawler.iml" filepath="$PROJECT_DIR$/.idea/http_crawler.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
@@ -0,0 +1,533 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="07223dd4-8944-486b-a29b-7461a5c9ec2d" name="Default" comment="">
5
+ <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
6
+ <change beforePath="$PROJECT_DIR$/lib/http_crawler/http.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/http.rb" afterDir="false" />
7
+ <change beforePath="$PROJECT_DIR$/lib/http_crawler/version.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/version.rb" afterDir="false" />
8
+ </list>
9
+ <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
10
+ <option name="TRACKING_ENABLED" value="true" />
11
+ <option name="SHOW_DIALOG" value="false" />
12
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
13
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
14
+ <option name="LAST_RESOLUTION" value="IGNORE" />
15
+ </component>
16
+ <component name="FileEditorManager">
17
+ <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
18
+ <file leaf-file-name="version.rb" pinned="false" current-in-tab="true">
19
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
20
+ <provider selected="true" editor-type-id="text-editor">
21
+ <state relative-caret-position="15">
22
+ <caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
23
+ </state>
24
+ </provider>
25
+ </entry>
26
+ </file>
27
+ <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
28
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
29
+ <provider selected="true" editor-type-id="text-editor">
30
+ <state relative-caret-position="30">
31
+ <caret line="2" column="17" selection-start-line="2" selection-start-column="17" selection-end-line="2" selection-end-column="17" />
32
+ </state>
33
+ </provider>
34
+ </entry>
35
+ </file>
36
+ <file leaf-file-name="http_crawler.rb" pinned="false" current-in-tab="false">
37
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
38
+ <provider selected="true" editor-type-id="text-editor">
39
+ <state relative-caret-position="120">
40
+ <caret line="8" lean-forward="true" selection-start-line="8" selection-end-line="8" />
41
+ </state>
42
+ </provider>
43
+ </entry>
44
+ </file>
45
+ <file leaf-file-name="http.rb" pinned="false" current-in-tab="false">
46
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
47
+ <provider selected="true" editor-type-id="text-editor">
48
+ <state relative-caret-position="278">
49
+ <caret line="165" column="41" selection-start-line="165" selection-start-column="41" selection-end-line="165" selection-end-column="41" />
50
+ </state>
51
+ </provider>
52
+ </entry>
53
+ </file>
54
+ <file leaf-file-name="common.rb" pinned="false" current-in-tab="false">
55
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
56
+ <provider selected="true" editor-type-id="text-editor">
57
+ <state relative-caret-position="45">
58
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
59
+ </state>
60
+ </provider>
61
+ </entry>
62
+ </file>
63
+ <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
64
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
65
+ <provider selected="true" editor-type-id="text-editor">
66
+ <state relative-caret-position="-99">
67
+ <caret line="26" column="4" selection-start-line="26" selection-start-column="4" selection-end-line="26" selection-end-column="4" />
68
+ </state>
69
+ </provider>
70
+ </entry>
71
+ </file>
72
+ <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
73
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
74
+ <provider selected="true" editor-type-id="text-editor">
75
+ <state relative-caret-position="90">
76
+ <caret line="6" column="3" lean-forward="true" selection-start-line="6" selection-start-column="3" selection-end-line="6" selection-end-column="3" />
77
+ </state>
78
+ </provider>
79
+ </entry>
80
+ </file>
81
+ <file leaf-file-name="proxy.rb" pinned="false" current-in-tab="false">
82
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
83
+ <provider selected="true" editor-type-id="text-editor">
84
+ <state relative-caret-position="30">
85
+ <caret line="2" column="4" selection-start-line="2" selection-end-line="2" selection-end-column="4" />
86
+ </state>
87
+ </provider>
88
+ </entry>
89
+ </file>
90
+ </leaf>
91
+ </component>
92
+ <component name="FindInProjectRecents">
93
+ <findStrings>
94
+ <find>Crawler::Web</find>
95
+ <find>&quot;Crawler</find>
96
+ </findStrings>
97
+ <replaceStrings>
98
+ <replace>HttpCrawler::Web</replace>
99
+ <replace>HttpCrawler</replace>
100
+ </replaceStrings>
101
+ <dirStrings>
102
+ <dir>$PROJECT_DIR$</dir>
103
+ </dirStrings>
104
+ </component>
105
+ <component name="Git.Settings">
106
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
107
+ </component>
108
+ <component name="IdeDocumentHistory">
109
+ <option name="CHANGED_PATHS">
110
+ <list>
111
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/laofu/client.rb" />
112
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/README.md" />
113
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb" />
114
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response/get_proxy.rb" />
115
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response.rb" />
116
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/client.rb" />
117
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/response.rb" />
118
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/response/index.rb" />
119
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/response.rb" />
120
+ <option value="$PROJECT_DIR$/lib/http_crawler/web.rb" />
121
+ <option value="$PROJECT_DIR$/README.md" />
122
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb" />
123
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/README.md" />
124
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/README.md" />
125
+ <option value="$PROJECT_DIR$/http_crawler.gemspec" />
126
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/client.rb" />
127
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy.rb" />
128
+ <option value="$PROJECT_DIR$/lib/http_crawler/common.rb" />
129
+ <option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
130
+ <option value="$PROJECT_DIR$/lib/test.rb" />
131
+ <option value="$PROJECT_DIR$/lib/http_crawler/test1.rb" />
132
+ <option value="$PROJECT_DIR$/lib/http_crawler.rb" />
133
+ <option value="$PROJECT_DIR$/lib/http_crawler/test.rb" />
134
+ <option value="$PROJECT_DIR$/lib/http_crawler/test2.rb" />
135
+ <option value="$PROJECT_DIR$/lib/http_crawler/http.rb" />
136
+ <option value="$PROJECT_DIR$/lib/http_crawler/version.rb" />
137
+ </list>
138
+ </option>
139
+ </component>
140
+ <component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
141
+ <component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
142
+ <component name="JsGulpfileManager">
143
+ <detection-done>true</detection-done>
144
+ <sorting>DEFINITION_ORDER</sorting>
145
+ </component>
146
+ <component name="NodePackageJsonFileManager">
147
+ <packageJsonPaths />
148
+ </component>
149
+ <component name="ProjectFrameBounds" extendedState="6" fullScreen="true">
150
+ <option name="y" value="23" />
151
+ <option name="width" value="1680" />
152
+ <option name="height" value="1027" />
153
+ </component>
154
+ <component name="ProjectView">
155
+ <navigator proportions="" version="1">
156
+ <foldersAlwaysOnTop value="true" />
157
+ </navigator>
158
+ <panes>
159
+ <pane id="ProjectPane">
160
+ <subPane>
161
+ <expand>
162
+ <path>
163
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
164
+ <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
165
+ </path>
166
+ <path>
167
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
168
+ <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
169
+ <item name="lib" type="462c0819:PsiDirectoryNode" />
170
+ </path>
171
+ <path>
172
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
173
+ <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
174
+ <item name="lib" type="462c0819:PsiDirectoryNode" />
175
+ <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
176
+ </path>
177
+ </expand>
178
+ <select />
179
+ </subPane>
180
+ </pane>
181
+ <pane id="Scope" />
182
+ </panes>
183
+ </component>
184
+ <component name="PropertiesComponent">
185
+ <property name="WebServerToolWindowFactoryState" value="false" />
186
+ <property name="last_opened_file_path" value="$PROJECT_DIR$" />
187
+ <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
188
+ <property name="nodejs_npm_path_reset_for_default_project" value="true" />
189
+ </component>
190
+ <component name="RecentsManager">
191
+ <key name="MoveFile.RECENT_KEYS">
192
+ <recent name="$PROJECT_DIR$/lib/http_crawler/common" />
193
+ </key>
194
+ <key name="CopyFile.RECENT_KEYS">
195
+ <recent name="$PROJECT_DIR$/lib/http_crawler" />
196
+ <recent name="$PROJECT_DIR$/lib/http_crawler/web" />
197
+ </key>
198
+ </component>
199
+ <component name="RunDashboard">
200
+ <option name="ruleStates">
201
+ <list>
202
+ <RuleState>
203
+ <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
204
+ </RuleState>
205
+ <RuleState>
206
+ <option name="name" value="StatusDashboardGroupingRule" />
207
+ </RuleState>
208
+ </list>
209
+ </option>
210
+ </component>
211
+ <component name="SpringUtil" SPRING_PRE_LOADER_OPTION="true" />
212
+ <component name="SvnConfiguration">
213
+ <configuration />
214
+ </component>
215
+ <component name="TaskManager">
216
+ <task active="true" id="Default" summary="Default task">
217
+ <changelist id="07223dd4-8944-486b-a29b-7461a5c9ec2d" name="Default" comment="" />
218
+ <created>1545966039594</created>
219
+ <option name="number" value="Default" />
220
+ <option name="presentableId" value="Default" />
221
+ <updated>1545966039594</updated>
222
+ <workItem from="1545966041001" duration="9181000" />
223
+ <workItem from="1546164127129" duration="7006000" />
224
+ </task>
225
+ <servers />
226
+ </component>
227
+ <component name="TimeTrackingManager">
228
+ <option name="totallyTimeSpent" value="16187000" />
229
+ </component>
230
+ <component name="ToolWindowManager">
231
+ <frame x="0" y="0" width="1680" height="1050" extended-state="6" />
232
+ <editor active="true" />
233
+ <layout>
234
+ <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24603175" />
235
+ <window_info anchor="bottom" id="TODO" order="6" />
236
+ <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
237
+ <window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
238
+ <window_info anchor="right" id="Database" order="3" />
239
+ <window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
240
+ <window_info anchor="bottom" id="Version Control" order="7" />
241
+ <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
242
+ <window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.32902583" />
243
+ <window_info id="Favorites" order="2" side_tool="true" />
244
+ <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
245
+ <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
246
+ <window_info anchor="right" id="Commander" order="0" weight="0.4" />
247
+ <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
248
+ <window_info anchor="bottom" id="Run" order="2" />
249
+ <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
250
+ <window_info anchor="bottom" id="Message" order="0" />
251
+ <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
252
+ <window_info anchor="bottom" id="Find" order="1" />
253
+ </layout>
254
+ </component>
255
+ <component name="TypeScriptGeneratedFilesManager">
256
+ <option name="version" value="1" />
257
+ </component>
258
+ <component name="VcsContentAnnotationSettings">
259
+ <option name="myLimit" value="2678400000" />
260
+ </component>
261
+ <component name="editorHistoryManager">
262
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
263
+ <provider selected="true" editor-type-id="text-editor">
264
+ <state relative-caret-position="45">
265
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
266
+ </state>
267
+ </provider>
268
+ </entry>
269
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
270
+ <provider selected="true" editor-type-id="text-editor">
271
+ <state relative-caret-position="30">
272
+ <caret line="2" column="17" lean-forward="true" selection-start-line="2" selection-start-column="17" selection-end-line="2" selection-end-column="17" />
273
+ </state>
274
+ </provider>
275
+ </entry>
276
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
277
+ <provider selected="true" editor-type-id="text-editor">
278
+ <state relative-caret-position="120">
279
+ <caret line="8" lean-forward="true" selection-start-line="8" selection-end-line="8" />
280
+ </state>
281
+ </provider>
282
+ </entry>
283
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
284
+ <provider selected="true" editor-type-id="text-editor">
285
+ <state>
286
+ <caret column="4" selection-start-column="4" selection-end-column="4" />
287
+ </state>
288
+ </provider>
289
+ </entry>
290
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
291
+ <provider selected="true" editor-type-id="text-editor">
292
+ <state relative-caret-position="45">
293
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
294
+ </state>
295
+ </provider>
296
+ </entry>
297
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
298
+ <provider selected="true" editor-type-id="text-editor">
299
+ <state relative-caret-position="375">
300
+ <caret line="25" lean-forward="true" selection-start-line="25" selection-end-line="25" />
301
+ </state>
302
+ </provider>
303
+ </entry>
304
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
305
+ <provider selected="true" editor-type-id="text-editor">
306
+ <state relative-caret-position="90">
307
+ <caret line="6" column="3" lean-forward="true" selection-start-line="6" selection-start-column="3" selection-end-line="6" selection-end-column="3" />
308
+ </state>
309
+ </provider>
310
+ </entry>
311
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
312
+ <provider selected="true" editor-type-id="text-editor">
313
+ <state relative-caret-position="30">
314
+ <caret line="2" column="4" selection-start-line="2" selection-end-line="2" selection-end-column="4" />
315
+ </state>
316
+ </provider>
317
+ </entry>
318
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/drive.rb" />
319
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/README.md">
320
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
321
+ <state split_layout="SPLIT">
322
+ <first_editor relative-caret-position="45">
323
+ <caret line="3" column="11" selection-start-line="3" selection-start-column="11" selection-end-line="3" selection-end-column="11" />
324
+ </first_editor>
325
+ <second_editor />
326
+ </state>
327
+ </provider>
328
+ </entry>
329
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/response.rb">
330
+ <provider selected="true" editor-type-id="text-editor">
331
+ <state relative-caret-position="30">
332
+ <caret line="2" column="18" selection-start-line="2" selection-start-column="18" selection-end-line="2" selection-end-column="18" />
333
+ </state>
334
+ </provider>
335
+ </entry>
336
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/web.rb">
337
+ <provider selected="true" editor-type-id="text-editor">
338
+ <state relative-caret-position="30">
339
+ <caret line="2" column="18" selection-start-line="2" selection-start-column="18" selection-end-line="2" selection-end-column="18" />
340
+ </state>
341
+ </provider>
342
+ </entry>
343
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
344
+ <provider selected="true" editor-type-id="text-editor" />
345
+ </entry>
346
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/object.rb">
347
+ <provider selected="true" editor-type-id="text-editor" />
348
+ </entry>
349
+ <entry file="file://$PROJECT_DIR$/README.md">
350
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
351
+ <state split_layout="SPLIT">
352
+ <first_editor relative-caret-position="255">
353
+ <caret line="17" lean-forward="true" selection-start-line="17" selection-end-line="17" />
354
+ </first_editor>
355
+ <second_editor />
356
+ </state>
357
+ </provider>
358
+ </entry>
359
+ <entry file="file://$PROJECT_DIR$/CODE_OF_CONDUCT.md">
360
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
361
+ <state split_layout="SPLIT">
362
+ <first_editor relative-caret-position="1110">
363
+ <caret line="74" lean-forward="true" selection-start-line="74" selection-end-line="74" />
364
+ </first_editor>
365
+ <second_editor />
366
+ </state>
367
+ </provider>
368
+ </entry>
369
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/net/http.rb">
370
+ <provider selected="true" editor-type-id="text-editor" />
371
+ </entry>
372
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/net/response.rb">
373
+ <provider selected="true" editor-type-id="text-editor">
374
+ <state relative-caret-position="15">
375
+ <caret line="6" column="45" lean-forward="true" selection-start-line="6" selection-start-column="45" selection-end-line="6" selection-end-column="45" />
376
+ </state>
377
+ </provider>
378
+ </entry>
379
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response/get_proxy.rb">
380
+ <provider selected="true" editor-type-id="text-editor">
381
+ <state relative-caret-position="15">
382
+ <caret line="1" column="18" selection-start-line="1" selection-start-column="18" selection-end-line="1" selection-end-column="18" />
383
+ </state>
384
+ </provider>
385
+ </entry>
386
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response.rb">
387
+ <provider selected="true" editor-type-id="text-editor">
388
+ <state relative-caret-position="120">
389
+ <caret line="8" column="7" lean-forward="true" selection-start-line="8" selection-start-column="7" selection-end-line="8" selection-end-column="7" />
390
+ </state>
391
+ </provider>
392
+ </entry>
393
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/response/index.rb">
394
+ <provider selected="true" editor-type-id="text-editor">
395
+ <state relative-caret-position="15">
396
+ <caret line="1" column="18" selection-start-line="1" selection-start-column="18" selection-end-line="1" selection-end-column="18" />
397
+ </state>
398
+ </provider>
399
+ </entry>
400
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/README.md">
401
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
402
+ <state split_layout="SPLIT">
403
+ <first_editor relative-caret-position="240">
404
+ <caret line="16" lean-forward="true" selection-start-line="16" selection-end-line="18" selection-end-column="15" />
405
+ </first_editor>
406
+ <second_editor />
407
+ </state>
408
+ </provider>
409
+ </entry>
410
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/README.md">
411
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
412
+ <state split_layout="SPLIT">
413
+ <first_editor relative-caret-position="45">
414
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
415
+ </first_editor>
416
+ <second_editor />
417
+ </state>
418
+ </provider>
419
+ </entry>
420
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/README.md">
421
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
422
+ <state split_layout="SPLIT">
423
+ <first_editor relative-caret-position="60">
424
+ <caret line="4" lean-forward="true" selection-start-line="4" selection-end-line="4" />
425
+ </first_editor>
426
+ <second_editor />
427
+ </state>
428
+ </provider>
429
+ </entry>
430
+ <entry file="file://$PROJECT_DIR$/http_crawler.gemspec">
431
+ <provider selected="true" editor-type-id="text-editor">
432
+ <state relative-caret-position="330">
433
+ <caret line="22" column="26" lean-forward="true" selection-start-line="22" selection-start-column="26" selection-end-line="22" selection-end-column="26" />
434
+ </state>
435
+ </provider>
436
+ </entry>
437
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/response.rb">
438
+ <provider selected="true" editor-type-id="text-editor">
439
+ <state relative-caret-position="15">
440
+ <caret line="1" column="18" selection-start-line="1" selection-start-column="18" selection-end-line="1" selection-end-column="18" />
441
+ </state>
442
+ </provider>
443
+ </entry>
444
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
445
+ <provider selected="true" editor-type-id="text-editor">
446
+ <state relative-caret-position="75">
447
+ <caret line="5" lean-forward="true" selection-start-line="5" selection-end-line="5" />
448
+ </state>
449
+ </provider>
450
+ </entry>
451
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
452
+ <provider selected="true" editor-type-id="text-editor">
453
+ <state>
454
+ <caret column="18" selection-start-column="7" selection-end-column="18" />
455
+ </state>
456
+ </provider>
457
+ </entry>
458
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
459
+ <provider selected="true" editor-type-id="text-editor">
460
+ <state relative-caret-position="90">
461
+ <caret line="6" column="3" lean-forward="true" selection-start-line="6" selection-start-column="3" selection-end-line="6" selection-end-column="3" />
462
+ </state>
463
+ </provider>
464
+ </entry>
465
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
466
+ <provider selected="true" editor-type-id="text-editor">
467
+ <state relative-caret-position="30">
468
+ <caret line="2" column="4" selection-start-line="2" selection-end-line="2" selection-end-column="4" />
469
+ </state>
470
+ </provider>
471
+ </entry>
472
+ <entry file="file://$PROJECT_DIR$/lib/test.rb" />
473
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/test1.rb" />
474
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/test.rb" />
475
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/test2.rb" />
476
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
477
+ <provider selected="true" editor-type-id="text-editor">
478
+ <state relative-caret-position="45">
479
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
480
+ </state>
481
+ </provider>
482
+ </entry>
483
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
484
+ <provider selected="true" editor-type-id="text-editor">
485
+ <state relative-caret-position="30">
486
+ <caret line="2" column="17" selection-start-line="2" selection-start-column="17" selection-end-line="2" selection-end-column="17" />
487
+ </state>
488
+ </provider>
489
+ </entry>
490
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
491
+ <provider selected="true" editor-type-id="text-editor">
492
+ <state relative-caret-position="120">
493
+ <caret line="8" lean-forward="true" selection-start-line="8" selection-end-line="8" />
494
+ </state>
495
+ </provider>
496
+ </entry>
497
+ <entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/net/http/responses.rb">
498
+ <provider selected="true" editor-type-id="text-editor">
499
+ <state relative-caret-position="215">
500
+ <caret line="113" column="24" lean-forward="true" selection-start-line="113" selection-start-column="24" selection-end-line="113" selection-end-column="24" />
501
+ </state>
502
+ </provider>
503
+ </entry>
504
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
505
+ <provider selected="true" editor-type-id="text-editor">
506
+ <state relative-caret-position="-99">
507
+ <caret line="26" column="4" selection-start-line="26" selection-start-column="4" selection-end-line="26" selection-end-column="4" />
508
+ </state>
509
+ </provider>
510
+ </entry>
511
+ <entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/net/http.rb">
512
+ <provider selected="true" editor-type-id="text-editor">
513
+ <state relative-caret-position="15">
514
+ <caret line="1065" column="8" selection-start-line="1065" selection-start-column="8" selection-end-line="1065" selection-end-column="8" />
515
+ </state>
516
+ </provider>
517
+ </entry>
518
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
519
+ <provider selected="true" editor-type-id="text-editor">
520
+ <state relative-caret-position="278">
521
+ <caret line="165" column="41" selection-start-line="165" selection-start-column="41" selection-end-line="165" selection-end-column="41" />
522
+ </state>
523
+ </provider>
524
+ </entry>
525
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
526
+ <provider selected="true" editor-type-id="text-editor">
527
+ <state relative-caret-position="15">
528
+ <caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
529
+ </state>
530
+ </provider>
531
+ </entry>
532
+ </component>
533
+ </project>
@@ -1,5 +1,5 @@
1
1
 
2
- require File.dirname(__FILE__) + '/web/client.rb'
2
+ load File.dirname(__FILE__) + '/web/client.rb'
3
3
 
4
4
  module HttpCrawler
5
5
  module Client
@@ -1,3 +1,3 @@
1
- require File.dirname(__FILE__) + '/http.rb'
2
- require File.dirname(__FILE__) + '/object.rb'
3
- require File.dirname(__FILE__) + '/string.rb'
1
+ load File.dirname(__FILE__) + '/http.rb'
2
+ load File.dirname(__FILE__) + '/object.rb'
3
+ load File.dirname(__FILE__) + '/string.rb'
@@ -1,5 +1,5 @@
1
- require File.dirname(__FILE__) + '/net/http.rb'
2
- require File.dirname(__FILE__) + '/net/response.rb'
1
+ load File.dirname(__FILE__) + '/net/http.rb'
2
+ load File.dirname(__FILE__) + '/net/response.rb'
3
3
 
4
4
  module HttpCrawler
5
5
  class HTTP < Net::HTTP
@@ -31,6 +31,7 @@ module HttpCrawler
31
31
  def proxy_api
32
32
  @proxy_api ||= "my"
33
33
  end
34
+
34
35
  @@proxy_list = []
35
36
  # 为 @http 重设代理
36
37
  def proxy(p = {})
@@ -127,6 +128,15 @@ module HttpCrawler
127
128
  server_error_sleep
128
129
  # 重新请求
129
130
  get_fetch(uri_or_path, initheader, dest, &block)
131
+ when Net::HTTPProxyAuthenticationRequired then
132
+ Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{@proxy_address}:#{@proxy_port}] =>#{address}"
133
+ if update_proxy?
134
+ server_error_sleep
135
+ # 重新请求
136
+ get_fetch(uri_or_path, initheader, dest, &block)
137
+ else
138
+ response.error!
139
+ end
130
140
  else
131
141
  server_error_sleep
132
142
  response.error!
@@ -137,7 +147,7 @@ module HttpCrawler
137
147
  def post_fetch(uri_or_path, data, initheader = nil, dest = nil, &block)
138
148
  # 更新uri_or_path 如果 uri_or_path 是 String类型 同时 又不是 ascii编码格式就进行转码
139
149
  uri_or_path = URI.encode(uri_or_path) if String === uri_or_path && CharDet.detect(uri_or_path)["encoding"] != "ascii"
140
- Rails.logger.debug "post_fetch => #{uri_or_path}"
150
+ # Rails.logger.debug "post_fetch => #{uri_or_path}"
141
151
  response = post(uri_or_path, data, initheader, dest, &block)
142
152
  case response
143
153
  when Net::HTTPSuccess then
@@ -152,6 +162,15 @@ module HttpCrawler
152
162
  server_error_sleep
153
163
  # 重新请求
154
164
  post_fetch(uri_or_path, initheader, dest, &block)
165
+ when Net::HTTPProxyAuthenticationRequired then
166
+ Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{http.proxy_address}:#{http.proxy_port}] =>#{address}"
167
+ if update_proxy?
168
+ server_error_sleep
169
+ # 重新请求
170
+ post_fetch(uri_or_path, initheader, dest, &block)
171
+ else
172
+ response.error!
173
+ end
155
174
  else
156
175
  server_error_sleep
157
176
  response.error!
@@ -166,6 +185,7 @@ module HttpCrawler
166
185
  def request(req, body = nil, &block)
167
186
  begin
168
187
  Rails.logger.debug("#{req.class} => #{use_ssl? ? "https://" : "http://" }#{address}:#{port}#{req.path}") if started?
188
+ Rails.logger.debug("body => #{body}") if started? && body
169
189
  super(req, body, &block)
170
190
  rescue => error
171
191
  if started?
@@ -1,6 +1,6 @@
1
1
 
2
2
 
3
- require File.dirname(__FILE__) + '/proxy/client.rb'
3
+ load File.dirname(__FILE__) + '/proxy/client.rb'
4
4
 
5
5
  module HttpCrawler
6
6
  module Proxy
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.2.2.1"
2
+ VERSION = "0.2.2.3"
3
3
  end
data/lib/http_crawler.rb CHANGED
@@ -3,9 +3,9 @@ require 'json'
3
3
  require 'digest/md5'
4
4
  require 'nokogiri'
5
5
 
6
- require 'http_crawler/client'
7
- require 'http_crawler/proxy'
8
- require 'http_crawler/http'
6
+ load 'http_crawler/client.rb'
7
+ load 'http_crawler/proxy.rb'
8
+ load 'http_crawler/http.rb'
9
9
 
10
10
  module HttpCrawler
11
11
  # Your code goes here...
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2.1
4
+ version: 0.2.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-12-28 00:00:00.000000000 Z
11
+ date: 2018-12-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -116,7 +116,12 @@ extensions: []
116
116
  extra_rdoc_files: []
117
117
  files:
118
118
  - ".gitignore"
119
+ - ".idea/.rakeTasks"
120
+ - ".idea/http_crawler.iml"
121
+ - ".idea/misc.xml"
122
+ - ".idea/modules.xml"
119
123
  - ".idea/vcs.xml"
124
+ - ".idea/workspace.xml"
120
125
  - ".rspec"
121
126
  - CODE_OF_CONDUCT.md
122
127
  - Gemfile