http_crawler 0.2.2.1 → 0.2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd9558feda79b2d61eb24cde356bd116ffba561f
4
- data.tar.gz: 5ad0e3f3067d2034f3de5da093742b0abbe3481b
3
+ metadata.gz: 32a91911a47a0b39ca19d9655a8a364dc3416bd1
4
+ data.tar.gz: b62145f8d063c9825c5d93f1ec647f16995b8cad
5
5
  SHA512:
6
- metadata.gz: ea2dd05c3bdb2ebfe80f859f1aafe9f1addab311d9a343bea89b5e1d8e0c8958bb9890ab83f9495c5edd872c8b9d1f50c7ddc05b884c090ba48876913c9485cc
7
- data.tar.gz: cc4413f1f01571455991d52e6ad3137f50de0fe63101d1acd3a16c95d772be654f1efca23cf503a0f781a652ea585408351399a974aa66566f98d5a906b98f7a
6
+ metadata.gz: 0e06a1f44aa20c7be935d0b5e0c56ed58c3f5c48bd85df5d2ca193f3174dd827a1df2f0b23d2372969cf182a92b7a6312d0108da1dfc11a880edf7768655fa23
7
+ data.tar.gz: 430f009ad66fd802ddf3cee8e619c2cd1b8047b333af1658b39806af18f23af572b861166d6030a80e4e6743944813b2a2317b6a9b653a9537e24b4d59cf613b
data/.idea/.rakeTasks ADDED
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <Settings><!--This file was automatically generated by Ruby plugin.
3
+ You are allowed to:
4
+ 1. Remove rake task
5
+ 2. Add existing rake tasks
6
+ To add existing rake tasks automatically delete this file and reload the project.
7
+ --><RakeGroup description="" fullCmd="" taksId="rake"><RakeTask description="Build http_crawler-0.1.6.gem into the pkg directory" fullCmd="build" taksId="build" /><RakeTask description="Remove any temporary products" fullCmd="clean" taksId="clean" /><RakeTask description="Remove any generated files" fullCmd="clobber" taksId="clobber" /><RakeTask description="Build and install http_crawler-0.1.6.gem into system gems" fullCmd="install" taksId="install" /><RakeGroup description="" fullCmd="" taksId="install"><RakeTask description="Build and install http_crawler-0.1.6.gem into system gems without network access" fullCmd="install:local" taksId="local" /></RakeGroup><RakeTask description="Create tag v0.1.6 and build and push http_crawler-0.1.6.gem to Rubygems" fullCmd="release[remote]" taksId="release[remote]" /><RakeTask description="" fullCmd="default" taksId="default" /><RakeTask description="" fullCmd="release" taksId="release" /><RakeGroup description="" fullCmd="" taksId="release"><RakeTask description="" fullCmd="release:guard_clean" taksId="guard_clean" /><RakeTask description="" fullCmd="release:rubygem_push" taksId="rubygem_push" /><RakeTask description="" fullCmd="release:source_control_push" taksId="source_control_push" /></RakeGroup></RakeGroup></Settings>
@@ -0,0 +1,13 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="RUBY_MODULE" version="4">
3
+ <component name="ModuleRunConfigurationManager">
4
+ <shared />
5
+ </component>
6
+ <component name="NewModuleRootManager">
7
+ <content url="file://$MODULE_DIR$" />
8
+ <orderEntry type="inheritedJdk" />
9
+ <orderEntry type="sourceFolder" forTests="false" />
10
+ <orderEntry type="library" scope="PROVIDED" name="bundler (v1.15.1, RVM: ruby-2.4.1) [gem]" level="application" />
11
+ <orderEntry type="library" scope="PROVIDED" name="nokogiri (v1.8.0, RVM: ruby-2.4.1) [gem]" level="application" />
12
+ </component>
13
+ </module>
data/.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="JavaScriptSettings">
4
+ <option name="languageLevel" value="ES6" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="RVM: ruby-2.4.1" project-jdk-type="RUBY_SDK" />
7
+ </project>
data/.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/http_crawler.iml" filepath="$PROJECT_DIR$/.idea/http_crawler.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
@@ -0,0 +1,533 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="07223dd4-8944-486b-a29b-7461a5c9ec2d" name="Default" comment="">
5
+ <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
6
+ <change beforePath="$PROJECT_DIR$/lib/http_crawler/http.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/http.rb" afterDir="false" />
7
+ <change beforePath="$PROJECT_DIR$/lib/http_crawler/version.rb" beforeDir="false" afterPath="$PROJECT_DIR$/lib/http_crawler/version.rb" afterDir="false" />
8
+ </list>
9
+ <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
10
+ <option name="TRACKING_ENABLED" value="true" />
11
+ <option name="SHOW_DIALOG" value="false" />
12
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
13
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
14
+ <option name="LAST_RESOLUTION" value="IGNORE" />
15
+ </component>
16
+ <component name="FileEditorManager">
17
+ <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
18
+ <file leaf-file-name="version.rb" pinned="false" current-in-tab="true">
19
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
20
+ <provider selected="true" editor-type-id="text-editor">
21
+ <state relative-caret-position="15">
22
+ <caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
23
+ </state>
24
+ </provider>
25
+ </entry>
26
+ </file>
27
+ <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
28
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
29
+ <provider selected="true" editor-type-id="text-editor">
30
+ <state relative-caret-position="30">
31
+ <caret line="2" column="17" selection-start-line="2" selection-start-column="17" selection-end-line="2" selection-end-column="17" />
32
+ </state>
33
+ </provider>
34
+ </entry>
35
+ </file>
36
+ <file leaf-file-name="http_crawler.rb" pinned="false" current-in-tab="false">
37
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
38
+ <provider selected="true" editor-type-id="text-editor">
39
+ <state relative-caret-position="120">
40
+ <caret line="8" lean-forward="true" selection-start-line="8" selection-end-line="8" />
41
+ </state>
42
+ </provider>
43
+ </entry>
44
+ </file>
45
+ <file leaf-file-name="http.rb" pinned="false" current-in-tab="false">
46
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
47
+ <provider selected="true" editor-type-id="text-editor">
48
+ <state relative-caret-position="278">
49
+ <caret line="165" column="41" selection-start-line="165" selection-start-column="41" selection-end-line="165" selection-end-column="41" />
50
+ </state>
51
+ </provider>
52
+ </entry>
53
+ </file>
54
+ <file leaf-file-name="common.rb" pinned="false" current-in-tab="false">
55
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
56
+ <provider selected="true" editor-type-id="text-editor">
57
+ <state relative-caret-position="45">
58
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
59
+ </state>
60
+ </provider>
61
+ </entry>
62
+ </file>
63
+ <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
64
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
65
+ <provider selected="true" editor-type-id="text-editor">
66
+ <state relative-caret-position="-99">
67
+ <caret line="26" column="4" selection-start-line="26" selection-start-column="4" selection-end-line="26" selection-end-column="4" />
68
+ </state>
69
+ </provider>
70
+ </entry>
71
+ </file>
72
+ <file leaf-file-name="client.rb" pinned="false" current-in-tab="false">
73
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
74
+ <provider selected="true" editor-type-id="text-editor">
75
+ <state relative-caret-position="90">
76
+ <caret line="6" column="3" lean-forward="true" selection-start-line="6" selection-start-column="3" selection-end-line="6" selection-end-column="3" />
77
+ </state>
78
+ </provider>
79
+ </entry>
80
+ </file>
81
+ <file leaf-file-name="proxy.rb" pinned="false" current-in-tab="false">
82
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
83
+ <provider selected="true" editor-type-id="text-editor">
84
+ <state relative-caret-position="30">
85
+ <caret line="2" column="4" selection-start-line="2" selection-end-line="2" selection-end-column="4" />
86
+ </state>
87
+ </provider>
88
+ </entry>
89
+ </file>
90
+ </leaf>
91
+ </component>
92
+ <component name="FindInProjectRecents">
93
+ <findStrings>
94
+ <find>Crawler::Web</find>
95
+ <find>&quot;Crawler</find>
96
+ </findStrings>
97
+ <replaceStrings>
98
+ <replace>HttpCrawler::Web</replace>
99
+ <replace>HttpCrawler</replace>
100
+ </replaceStrings>
101
+ <dirStrings>
102
+ <dir>$PROJECT_DIR$</dir>
103
+ </dirStrings>
104
+ </component>
105
+ <component name="Git.Settings">
106
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
107
+ </component>
108
+ <component name="IdeDocumentHistory">
109
+ <option name="CHANGED_PATHS">
110
+ <list>
111
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/laofu/client.rb" />
112
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/README.md" />
113
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb" />
114
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response/get_proxy.rb" />
115
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response.rb" />
116
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/client.rb" />
117
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/response.rb" />
118
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/response/index.rb" />
119
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/response.rb" />
120
+ <option value="$PROJECT_DIR$/lib/http_crawler/web.rb" />
121
+ <option value="$PROJECT_DIR$/README.md" />
122
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb" />
123
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/baidu/README.md" />
124
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/README.md" />
125
+ <option value="$PROJECT_DIR$/http_crawler.gemspec" />
126
+ <option value="$PROJECT_DIR$/lib/http_crawler/web/client.rb" />
127
+ <option value="$PROJECT_DIR$/lib/http_crawler/proxy.rb" />
128
+ <option value="$PROJECT_DIR$/lib/http_crawler/common.rb" />
129
+ <option value="$PROJECT_DIR$/lib/http_crawler/client.rb" />
130
+ <option value="$PROJECT_DIR$/lib/test.rb" />
131
+ <option value="$PROJECT_DIR$/lib/http_crawler/test1.rb" />
132
+ <option value="$PROJECT_DIR$/lib/http_crawler.rb" />
133
+ <option value="$PROJECT_DIR$/lib/http_crawler/test.rb" />
134
+ <option value="$PROJECT_DIR$/lib/http_crawler/test2.rb" />
135
+ <option value="$PROJECT_DIR$/lib/http_crawler/http.rb" />
136
+ <option value="$PROJECT_DIR$/lib/http_crawler/version.rb" />
137
+ </list>
138
+ </option>
139
+ </component>
140
+ <component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
141
+ <component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
142
+ <component name="JsGulpfileManager">
143
+ <detection-done>true</detection-done>
144
+ <sorting>DEFINITION_ORDER</sorting>
145
+ </component>
146
+ <component name="NodePackageJsonFileManager">
147
+ <packageJsonPaths />
148
+ </component>
149
+ <component name="ProjectFrameBounds" extendedState="6" fullScreen="true">
150
+ <option name="y" value="23" />
151
+ <option name="width" value="1680" />
152
+ <option name="height" value="1027" />
153
+ </component>
154
+ <component name="ProjectView">
155
+ <navigator proportions="" version="1">
156
+ <foldersAlwaysOnTop value="true" />
157
+ </navigator>
158
+ <panes>
159
+ <pane id="ProjectPane">
160
+ <subPane>
161
+ <expand>
162
+ <path>
163
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
164
+ <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
165
+ </path>
166
+ <path>
167
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
168
+ <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
169
+ <item name="lib" type="462c0819:PsiDirectoryNode" />
170
+ </path>
171
+ <path>
172
+ <item name="http_crawler" type="b2602c69:ProjectViewProjectNode" />
173
+ <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
174
+ <item name="lib" type="462c0819:PsiDirectoryNode" />
175
+ <item name="http_crawler" type="462c0819:PsiDirectoryNode" />
176
+ </path>
177
+ </expand>
178
+ <select />
179
+ </subPane>
180
+ </pane>
181
+ <pane id="Scope" />
182
+ </panes>
183
+ </component>
184
+ <component name="PropertiesComponent">
185
+ <property name="WebServerToolWindowFactoryState" value="false" />
186
+ <property name="last_opened_file_path" value="$PROJECT_DIR$" />
187
+ <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
188
+ <property name="nodejs_npm_path_reset_for_default_project" value="true" />
189
+ </component>
190
+ <component name="RecentsManager">
191
+ <key name="MoveFile.RECENT_KEYS">
192
+ <recent name="$PROJECT_DIR$/lib/http_crawler/common" />
193
+ </key>
194
+ <key name="CopyFile.RECENT_KEYS">
195
+ <recent name="$PROJECT_DIR$/lib/http_crawler" />
196
+ <recent name="$PROJECT_DIR$/lib/http_crawler/web" />
197
+ </key>
198
+ </component>
199
+ <component name="RunDashboard">
200
+ <option name="ruleStates">
201
+ <list>
202
+ <RuleState>
203
+ <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
204
+ </RuleState>
205
+ <RuleState>
206
+ <option name="name" value="StatusDashboardGroupingRule" />
207
+ </RuleState>
208
+ </list>
209
+ </option>
210
+ </component>
211
+ <component name="SpringUtil" SPRING_PRE_LOADER_OPTION="true" />
212
+ <component name="SvnConfiguration">
213
+ <configuration />
214
+ </component>
215
+ <component name="TaskManager">
216
+ <task active="true" id="Default" summary="Default task">
217
+ <changelist id="07223dd4-8944-486b-a29b-7461a5c9ec2d" name="Default" comment="" />
218
+ <created>1545966039594</created>
219
+ <option name="number" value="Default" />
220
+ <option name="presentableId" value="Default" />
221
+ <updated>1545966039594</updated>
222
+ <workItem from="1545966041001" duration="9181000" />
223
+ <workItem from="1546164127129" duration="7006000" />
224
+ </task>
225
+ <servers />
226
+ </component>
227
+ <component name="TimeTrackingManager">
228
+ <option name="totallyTimeSpent" value="16187000" />
229
+ </component>
230
+ <component name="ToolWindowManager">
231
+ <frame x="0" y="0" width="1680" height="1050" extended-state="6" />
232
+ <editor active="true" />
233
+ <layout>
234
+ <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24603175" />
235
+ <window_info anchor="bottom" id="TODO" order="6" />
236
+ <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
237
+ <window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
238
+ <window_info anchor="right" id="Database" order="3" />
239
+ <window_info anchor="bottom" id="Database Changes" order="7" show_stripe_button="false" />
240
+ <window_info anchor="bottom" id="Version Control" order="7" />
241
+ <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
242
+ <window_info anchor="bottom" id="Terminal" order="7" visible="true" weight="0.32902583" />
243
+ <window_info id="Favorites" order="2" side_tool="true" />
244
+ <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
245
+ <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
246
+ <window_info anchor="right" id="Commander" order="0" weight="0.4" />
247
+ <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
248
+ <window_info anchor="bottom" id="Run" order="2" />
249
+ <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
250
+ <window_info anchor="bottom" id="Message" order="0" />
251
+ <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
252
+ <window_info anchor="bottom" id="Find" order="1" />
253
+ </layout>
254
+ </component>
255
+ <component name="TypeScriptGeneratedFilesManager">
256
+ <option name="version" value="1" />
257
+ </component>
258
+ <component name="VcsContentAnnotationSettings">
259
+ <option name="myLimit" value="2678400000" />
260
+ </component>
261
+ <component name="editorHistoryManager">
262
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
263
+ <provider selected="true" editor-type-id="text-editor">
264
+ <state relative-caret-position="45">
265
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
266
+ </state>
267
+ </provider>
268
+ </entry>
269
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
270
+ <provider selected="true" editor-type-id="text-editor">
271
+ <state relative-caret-position="30">
272
+ <caret line="2" column="17" lean-forward="true" selection-start-line="2" selection-start-column="17" selection-end-line="2" selection-end-column="17" />
273
+ </state>
274
+ </provider>
275
+ </entry>
276
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
277
+ <provider selected="true" editor-type-id="text-editor">
278
+ <state relative-caret-position="120">
279
+ <caret line="8" lean-forward="true" selection-start-line="8" selection-end-line="8" />
280
+ </state>
281
+ </provider>
282
+ </entry>
283
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
284
+ <provider selected="true" editor-type-id="text-editor">
285
+ <state>
286
+ <caret column="4" selection-start-column="4" selection-end-column="4" />
287
+ </state>
288
+ </provider>
289
+ </entry>
290
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
291
+ <provider selected="true" editor-type-id="text-editor">
292
+ <state relative-caret-position="45">
293
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
294
+ </state>
295
+ </provider>
296
+ </entry>
297
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
298
+ <provider selected="true" editor-type-id="text-editor">
299
+ <state relative-caret-position="375">
300
+ <caret line="25" lean-forward="true" selection-start-line="25" selection-end-line="25" />
301
+ </state>
302
+ </provider>
303
+ </entry>
304
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
305
+ <provider selected="true" editor-type-id="text-editor">
306
+ <state relative-caret-position="90">
307
+ <caret line="6" column="3" lean-forward="true" selection-start-line="6" selection-start-column="3" selection-end-line="6" selection-end-column="3" />
308
+ </state>
309
+ </provider>
310
+ </entry>
311
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
312
+ <provider selected="true" editor-type-id="text-editor">
313
+ <state relative-caret-position="30">
314
+ <caret line="2" column="4" selection-start-line="2" selection-end-line="2" selection-end-column="4" />
315
+ </state>
316
+ </provider>
317
+ </entry>
318
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/drive.rb" />
319
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/README.md">
320
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
321
+ <state split_layout="SPLIT">
322
+ <first_editor relative-caret-position="45">
323
+ <caret line="3" column="11" selection-start-line="3" selection-start-column="11" selection-end-line="3" selection-end-column="11" />
324
+ </first_editor>
325
+ <second_editor />
326
+ </state>
327
+ </provider>
328
+ </entry>
329
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/response.rb">
330
+ <provider selected="true" editor-type-id="text-editor">
331
+ <state relative-caret-position="30">
332
+ <caret line="2" column="18" selection-start-line="2" selection-start-column="18" selection-end-line="2" selection-end-column="18" />
333
+ </state>
334
+ </provider>
335
+ </entry>
336
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/web.rb">
337
+ <provider selected="true" editor-type-id="text-editor">
338
+ <state relative-caret-position="30">
339
+ <caret line="2" column="18" selection-start-line="2" selection-start-column="18" selection-end-line="2" selection-end-column="18" />
340
+ </state>
341
+ </provider>
342
+ </entry>
343
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/string.rb">
344
+ <provider selected="true" editor-type-id="text-editor" />
345
+ </entry>
346
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common/object.rb">
347
+ <provider selected="true" editor-type-id="text-editor" />
348
+ </entry>
349
+ <entry file="file://$PROJECT_DIR$/README.md">
350
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
351
+ <state split_layout="SPLIT">
352
+ <first_editor relative-caret-position="255">
353
+ <caret line="17" lean-forward="true" selection-start-line="17" selection-end-line="17" />
354
+ </first_editor>
355
+ <second_editor />
356
+ </state>
357
+ </provider>
358
+ </entry>
359
+ <entry file="file://$PROJECT_DIR$/CODE_OF_CONDUCT.md">
360
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
361
+ <state split_layout="SPLIT">
362
+ <first_editor relative-caret-position="1110">
363
+ <caret line="74" lean-forward="true" selection-start-line="74" selection-end-line="74" />
364
+ </first_editor>
365
+ <second_editor />
366
+ </state>
367
+ </provider>
368
+ </entry>
369
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/net/http.rb">
370
+ <provider selected="true" editor-type-id="text-editor" />
371
+ </entry>
372
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/net/response.rb">
373
+ <provider selected="true" editor-type-id="text-editor">
374
+ <state relative-caret-position="15">
375
+ <caret line="6" column="45" lean-forward="true" selection-start-line="6" selection-start-column="45" selection-end-line="6" selection-end-column="45" />
376
+ </state>
377
+ </provider>
378
+ </entry>
379
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response/get_proxy.rb">
380
+ <provider selected="true" editor-type-id="text-editor">
381
+ <state relative-caret-position="15">
382
+ <caret line="1" column="18" selection-start-line="1" selection-start-column="18" selection-end-line="1" selection-end-column="18" />
383
+ </state>
384
+ </provider>
385
+ </entry>
386
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/response.rb">
387
+ <provider selected="true" editor-type-id="text-editor">
388
+ <state relative-caret-position="120">
389
+ <caret line="8" column="7" lean-forward="true" selection-start-line="8" selection-start-column="7" selection-end-line="8" selection-end-column="7" />
390
+ </state>
391
+ </provider>
392
+ </entry>
393
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/response/index.rb">
394
+ <provider selected="true" editor-type-id="text-editor">
395
+ <state relative-caret-position="15">
396
+ <caret line="1" column="18" selection-start-line="1" selection-start-column="18" selection-end-line="1" selection-end-column="18" />
397
+ </state>
398
+ </provider>
399
+ </entry>
400
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/README.md">
401
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
402
+ <state split_layout="SPLIT">
403
+ <first_editor relative-caret-position="240">
404
+ <caret line="16" lean-forward="true" selection-start-line="16" selection-end-line="18" selection-end-column="15" />
405
+ </first_editor>
406
+ <second_editor />
407
+ </state>
408
+ </provider>
409
+ </entry>
410
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/README.md">
411
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
412
+ <state split_layout="SPLIT">
413
+ <first_editor relative-caret-position="45">
414
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
415
+ </first_editor>
416
+ <second_editor />
417
+ </state>
418
+ </provider>
419
+ </entry>
420
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/README.md">
421
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
422
+ <state split_layout="SPLIT">
423
+ <first_editor relative-caret-position="60">
424
+ <caret line="4" lean-forward="true" selection-start-line="4" selection-end-line="4" />
425
+ </first_editor>
426
+ <second_editor />
427
+ </state>
428
+ </provider>
429
+ </entry>
430
+ <entry file="file://$PROJECT_DIR$/http_crawler.gemspec">
431
+ <provider selected="true" editor-type-id="text-editor">
432
+ <state relative-caret-position="330">
433
+ <caret line="22" column="26" lean-forward="true" selection-start-line="22" selection-start-column="26" selection-end-line="22" selection-end-column="26" />
434
+ </state>
435
+ </provider>
436
+ </entry>
437
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/response.rb">
438
+ <provider selected="true" editor-type-id="text-editor">
439
+ <state relative-caret-position="15">
440
+ <caret line="1" column="18" selection-start-line="1" selection-start-column="18" selection-end-line="1" selection-end-column="18" />
441
+ </state>
442
+ </provider>
443
+ </entry>
444
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/baidu/client.rb">
445
+ <provider selected="true" editor-type-id="text-editor">
446
+ <state relative-caret-position="75">
447
+ <caret line="5" lean-forward="true" selection-start-line="5" selection-end-line="5" />
448
+ </state>
449
+ </provider>
450
+ </entry>
451
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/test_proxy_api/client.rb">
452
+ <provider selected="true" editor-type-id="text-editor">
453
+ <state>
454
+ <caret column="18" selection-start-column="7" selection-end-column="18" />
455
+ </state>
456
+ </provider>
457
+ </entry>
458
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/web/client.rb">
459
+ <provider selected="true" editor-type-id="text-editor">
460
+ <state relative-caret-position="90">
461
+ <caret line="6" column="3" lean-forward="true" selection-start-line="6" selection-start-column="3" selection-end-line="6" selection-end-column="3" />
462
+ </state>
463
+ </provider>
464
+ </entry>
465
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy.rb">
466
+ <provider selected="true" editor-type-id="text-editor">
467
+ <state relative-caret-position="30">
468
+ <caret line="2" column="4" selection-start-line="2" selection-end-line="2" selection-end-column="4" />
469
+ </state>
470
+ </provider>
471
+ </entry>
472
+ <entry file="file://$PROJECT_DIR$/lib/test.rb" />
473
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/test1.rb" />
474
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/test.rb" />
475
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/test2.rb" />
476
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/common.rb">
477
+ <provider selected="true" editor-type-id="text-editor">
478
+ <state relative-caret-position="45">
479
+ <caret line="3" lean-forward="true" selection-start-line="3" selection-end-line="3" />
480
+ </state>
481
+ </provider>
482
+ </entry>
483
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/proxy/client.rb">
484
+ <provider selected="true" editor-type-id="text-editor">
485
+ <state relative-caret-position="30">
486
+ <caret line="2" column="17" selection-start-line="2" selection-start-column="17" selection-end-line="2" selection-end-column="17" />
487
+ </state>
488
+ </provider>
489
+ </entry>
490
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler.rb">
491
+ <provider selected="true" editor-type-id="text-editor">
492
+ <state relative-caret-position="120">
493
+ <caret line="8" lean-forward="true" selection-start-line="8" selection-end-line="8" />
494
+ </state>
495
+ </provider>
496
+ </entry>
497
+ <entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/net/http/responses.rb">
498
+ <provider selected="true" editor-type-id="text-editor">
499
+ <state relative-caret-position="215">
500
+ <caret line="113" column="24" lean-forward="true" selection-start-line="113" selection-start-column="24" selection-end-line="113" selection-end-column="24" />
501
+ </state>
502
+ </provider>
503
+ </entry>
504
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/client.rb">
505
+ <provider selected="true" editor-type-id="text-editor">
506
+ <state relative-caret-position="-99">
507
+ <caret line="26" column="4" selection-start-line="26" selection-start-column="4" selection-end-line="26" selection-end-column="4" />
508
+ </state>
509
+ </provider>
510
+ </entry>
511
+ <entry file="file://$USER_HOME$/.rvm/rubies/ruby-2.4.1/lib/ruby/2.4.0/net/http.rb">
512
+ <provider selected="true" editor-type-id="text-editor">
513
+ <state relative-caret-position="15">
514
+ <caret line="1065" column="8" selection-start-line="1065" selection-start-column="8" selection-end-line="1065" selection-end-column="8" />
515
+ </state>
516
+ </provider>
517
+ </entry>
518
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/http.rb">
519
+ <provider selected="true" editor-type-id="text-editor">
520
+ <state relative-caret-position="278">
521
+ <caret line="165" column="41" selection-start-line="165" selection-start-column="41" selection-end-line="165" selection-end-column="41" />
522
+ </state>
523
+ </provider>
524
+ </entry>
525
+ <entry file="file://$PROJECT_DIR$/lib/http_crawler/version.rb">
526
+ <provider selected="true" editor-type-id="text-editor">
527
+ <state relative-caret-position="15">
528
+ <caret line="1" column="20" selection-start-line="1" selection-start-column="20" selection-end-line="1" selection-end-column="20" />
529
+ </state>
530
+ </provider>
531
+ </entry>
532
+ </component>
533
+ </project>
@@ -1,5 +1,5 @@
1
1
 
2
- require File.dirname(__FILE__) + '/web/client.rb'
2
+ load File.dirname(__FILE__) + '/web/client.rb'
3
3
 
4
4
  module HttpCrawler
5
5
  module Client
@@ -1,3 +1,3 @@
1
- require File.dirname(__FILE__) + '/http.rb'
2
- require File.dirname(__FILE__) + '/object.rb'
3
- require File.dirname(__FILE__) + '/string.rb'
1
+ load File.dirname(__FILE__) + '/http.rb'
2
+ load File.dirname(__FILE__) + '/object.rb'
3
+ load File.dirname(__FILE__) + '/string.rb'
@@ -1,5 +1,5 @@
1
- require File.dirname(__FILE__) + '/net/http.rb'
2
- require File.dirname(__FILE__) + '/net/response.rb'
1
+ load File.dirname(__FILE__) + '/net/http.rb'
2
+ load File.dirname(__FILE__) + '/net/response.rb'
3
3
 
4
4
  module HttpCrawler
5
5
  class HTTP < Net::HTTP
@@ -31,6 +31,7 @@ module HttpCrawler
31
31
  def proxy_api
32
32
  @proxy_api ||= "my"
33
33
  end
34
+
34
35
  @@proxy_list = []
35
36
  # 为 @http 重设代理
36
37
  def proxy(p = {})
@@ -127,6 +128,15 @@ module HttpCrawler
127
128
  server_error_sleep
128
129
  # 重新请求
129
130
  get_fetch(uri_or_path, initheader, dest, &block)
131
+ when Net::HTTPProxyAuthenticationRequired then
132
+ Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{@proxy_address}:#{@proxy_port}] =>#{address}"
133
+ if update_proxy?
134
+ server_error_sleep
135
+ # 重新请求
136
+ get_fetch(uri_or_path, initheader, dest, &block)
137
+ else
138
+ response.error!
139
+ end
130
140
  else
131
141
  server_error_sleep
132
142
  response.error!
@@ -137,7 +147,7 @@ module HttpCrawler
137
147
  def post_fetch(uri_or_path, data, initheader = nil, dest = nil, &block)
138
148
  # 更新uri_or_path 如果 uri_or_path 是 String类型 同时 又不是 ascii编码格式就进行转码
139
149
  uri_or_path = URI.encode(uri_or_path) if String === uri_or_path && CharDet.detect(uri_or_path)["encoding"] != "ascii"
140
- Rails.logger.debug "post_fetch => #{uri_or_path}"
150
+ # Rails.logger.debug "post_fetch => #{uri_or_path}"
141
151
  response = post(uri_or_path, data, initheader, dest, &block)
142
152
  case response
143
153
  when Net::HTTPSuccess then
@@ -152,6 +162,15 @@ module HttpCrawler
152
162
  server_error_sleep
153
163
  # 重新请求
154
164
  post_fetch(uri_or_path, initheader, dest, &block)
165
+ when Net::HTTPProxyAuthenticationRequired then
166
+ Rails.logger.warn "Net::HTTPProxyAuthenticationRequired 407 to proxy:[#{http.proxy_address}:#{http.proxy_port}] =>#{address}"
167
+ if update_proxy?
168
+ server_error_sleep
169
+ # 重新请求
170
+ post_fetch(uri_or_path, initheader, dest, &block)
171
+ else
172
+ response.error!
173
+ end
155
174
  else
156
175
  server_error_sleep
157
176
  response.error!
@@ -166,6 +185,7 @@ module HttpCrawler
166
185
  def request(req, body = nil, &block)
167
186
  begin
168
187
  Rails.logger.debug("#{req.class} => #{use_ssl? ? "https://" : "http://" }#{address}:#{port}#{req.path}") if started?
188
+ Rails.logger.debug("body => #{body}") if started? && body
169
189
  super(req, body, &block)
170
190
  rescue => error
171
191
  if started?
@@ -1,6 +1,6 @@
1
1
 
2
2
 
3
- require File.dirname(__FILE__) + '/proxy/client.rb'
3
+ load File.dirname(__FILE__) + '/proxy/client.rb'
4
4
 
5
5
  module HttpCrawler
6
6
  module Proxy
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.2.2.1"
2
+ VERSION = "0.2.2.3"
3
3
  end
data/lib/http_crawler.rb CHANGED
@@ -3,9 +3,9 @@ require 'json'
3
3
  require 'digest/md5'
4
4
  require 'nokogiri'
5
5
 
6
- require 'http_crawler/client'
7
- require 'http_crawler/proxy'
8
- require 'http_crawler/http'
6
+ load 'http_crawler/client.rb'
7
+ load 'http_crawler/proxy.rb'
8
+ load 'http_crawler/http.rb'
9
9
 
10
10
  module HttpCrawler
11
11
  # Your code goes here...
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2.1
4
+ version: 0.2.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-12-28 00:00:00.000000000 Z
11
+ date: 2018-12-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -116,7 +116,12 @@ extensions: []
116
116
  extra_rdoc_files: []
117
117
  files:
118
118
  - ".gitignore"
119
+ - ".idea/.rakeTasks"
120
+ - ".idea/http_crawler.iml"
121
+ - ".idea/misc.xml"
122
+ - ".idea/modules.xml"
119
123
  - ".idea/vcs.xml"
124
+ - ".idea/workspace.xml"
120
125
  - ".rspec"
121
126
  - CODE_OF_CONDUCT.md
122
127
  - Gemfile