crawlfish 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/.idea/.name +1 -0
- data/.idea/.rakeTasks +7 -0
- data/.idea/crawlfish.iml +10 -0
- data/.idea/dictionaries/dan.xml +7 -0
- data/.idea/encodings.xml +5 -0
- data/.idea/misc.xml +8 -0
- data/.idea/modules.xml +9 -0
- data/.idea/vcs.xml +7 -0
- data/.idea/workspace.xml +355 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/crawlfish.gemspec +23 -0
- data/lib/crawlfish.rb +62 -0
- data/lib/crawlfish/version.rb +3 -0
- metadata +95 -0
data/.gitignore
ADDED
data/.idea/.name
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
crawlfish
|
data/.idea/.rakeTasks
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<Settings><!--This file was automatically generated by Ruby plugin.
|
3
|
+
You are allowed to:
|
4
|
+
1. Remove rake task
|
5
|
+
2. Add existing rake tasks
|
6
|
+
To add existing rake tasks automatically delete this file and reload the project.
|
7
|
+
--><RakeGroup description="" fullCmd="" taksId="rake"><RakeTask description="Build crawlfish-0.0.1.gem into the pkg directory" fullCmd="build" taksId="build" /><RakeTask description="Build and install crawlfish-0.0.1.gem into system gems" fullCmd="install" taksId="install" /><RakeTask description="Create tag v0.0.1 and build and push crawlfish-0.0.1.gem to Rubygems" fullCmd="release" taksId="release" /></RakeGroup></Settings>
|
data/.idea/crawlfish.iml
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="NewModuleRootManager">
|
4
|
+
<content url="file://$MODULE_DIR$" />
|
5
|
+
<orderEntry type="inheritedJdk" />
|
6
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
7
|
+
<orderEntry type="library" scope="PROVIDED" name="[gem] bundler (v1.0.10, C:/RailsInstaller/Ruby1.8.7/lib/ruby/gems/1.8/gems/bundler-1.0.10)" level="application" />
|
8
|
+
</component>
|
9
|
+
</module>
|
10
|
+
|
data/.idea/encodings.xml
ADDED
data/.idea/misc.xml
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="DependencyValidationManager">
|
4
|
+
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
|
5
|
+
</component>
|
6
|
+
<component name="ProjectRootManager" version="2" project-jdk-name="Ruby SDK 1.8.7-p330" project-jdk-type="RUBY_SDK" />
|
7
|
+
</project>
|
8
|
+
|
data/.idea/modules.xml
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="ProjectModuleManager">
|
4
|
+
<modules>
|
5
|
+
<module fileurl="file://$PROJECT_DIR$/.idea/crawlfish.iml" filepath="$PROJECT_DIR$/.idea/crawlfish.iml" />
|
6
|
+
</modules>
|
7
|
+
</component>
|
8
|
+
</project>
|
9
|
+
|
data/.idea/vcs.xml
ADDED
data/.idea/workspace.xml
ADDED
@@ -0,0 +1,355 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="ChangeListManager">
|
4
|
+
<list default="true" id="52814ec4-7729-4991-a410-d3f62df9d527" name="Default" comment="">
|
5
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/.name" />
|
6
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/.rakeTasks" />
|
7
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/crawlfish.iml" />
|
8
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.gitignore" />
|
9
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/dictionaries/dan.xml" />
|
10
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/encodings.xml" />
|
11
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/misc.xml" />
|
12
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/modules.xml" />
|
13
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/vcs.xml" />
|
14
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
15
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/Gemfile" />
|
16
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/Rakefile" />
|
17
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/crawlfish.gemspec" />
|
18
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/lib/crawlfish.rb" />
|
19
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/lib/crawlfish/version.rb" />
|
20
|
+
</list>
|
21
|
+
<ignored path="crawlfish.iws" />
|
22
|
+
<ignored path=".idea/workspace.xml" />
|
23
|
+
<option name="TRACKING_ENABLED" value="true" />
|
24
|
+
<option name="SHOW_DIALOG" value="false" />
|
25
|
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
26
|
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
27
|
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
28
|
+
</component>
|
29
|
+
<component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
|
30
|
+
<component name="CoverageDataManager" choice="3" />
|
31
|
+
<component name="CreatePatchCommitExecutor">
|
32
|
+
<option name="PATCH_PATH" value="" />
|
33
|
+
<option name="REVERSE_PATCH" value="false" />
|
34
|
+
</component>
|
35
|
+
<component name="DaemonCodeAnalyzer">
|
36
|
+
<disable_hints />
|
37
|
+
</component>
|
38
|
+
<component name="FavoritesManager">
|
39
|
+
<favorites_list name="crawlfish" />
|
40
|
+
</component>
|
41
|
+
<component name="FileEditorManager">
|
42
|
+
<leaf>
|
43
|
+
<file leaf-file-name="crawlfish.gemspec" pinned="false" current="false" current-in-tab="false">
|
44
|
+
<entry file="file://$PROJECT_DIR$/crawlfish.gemspec">
|
45
|
+
<provider selected="true" editor-type-id="text-editor">
|
46
|
+
<state line="10" column="21" selection-start="335" selection-end="335" vertical-scroll-proportion="0.0">
|
47
|
+
<folding />
|
48
|
+
</state>
|
49
|
+
</provider>
|
50
|
+
</entry>
|
51
|
+
</file>
|
52
|
+
<file leaf-file-name="version.rb" pinned="false" current="true" current-in-tab="true">
|
53
|
+
<entry file="file://$PROJECT_DIR$/lib/crawlfish/version.rb">
|
54
|
+
<provider selected="true" editor-type-id="text-editor">
|
55
|
+
<state line="1" column="18" selection-start="35" selection-end="35" vertical-scroll-proportion="0.01724138">
|
56
|
+
<folding />
|
57
|
+
</state>
|
58
|
+
</provider>
|
59
|
+
</entry>
|
60
|
+
</file>
|
61
|
+
<file leaf-file-name="Gemfile" pinned="false" current="false" current-in-tab="false">
|
62
|
+
<entry file="file://$PROJECT_DIR$/Gemfile">
|
63
|
+
<provider selected="true" editor-type-id="text-editor">
|
64
|
+
<state line="4" column="0" selection-start="93" selection-end="93" vertical-scroll-proportion="0.0">
|
65
|
+
<folding />
|
66
|
+
</state>
|
67
|
+
</provider>
|
68
|
+
</entry>
|
69
|
+
</file>
|
70
|
+
<file leaf-file-name="crawlfish.rb" pinned="false" current="false" current-in-tab="false">
|
71
|
+
<entry file="file://$PROJECT_DIR$/lib/crawlfish.rb">
|
72
|
+
<provider selected="true" editor-type-id="text-editor">
|
73
|
+
<state line="61" column="3" selection-start="1587" selection-end="1587" vertical-scroll-proportion="0.0">
|
74
|
+
<folding />
|
75
|
+
</state>
|
76
|
+
</provider>
|
77
|
+
</entry>
|
78
|
+
</file>
|
79
|
+
</leaf>
|
80
|
+
</component>
|
81
|
+
<component name="FindManager">
|
82
|
+
<FindUsagesManager>
|
83
|
+
<setting name="OPEN_NEW_TAB" value="false" />
|
84
|
+
</FindUsagesManager>
|
85
|
+
</component>
|
86
|
+
<component name="Git.Settings">
|
87
|
+
<option name="CHECKOUT_INCLUDE_TAGS" value="false" />
|
88
|
+
<option name="UPDATE_CHANGES_POLICY" value="STASH" />
|
89
|
+
</component>
|
90
|
+
<component name="IdeDocumentHistory">
|
91
|
+
<option name="changedFiles">
|
92
|
+
<list>
|
93
|
+
<option value="$PROJECT_DIR$/crawlfish.gemspec" />
|
94
|
+
<option value="$PROJECT_DIR$/lib/crawlfish/crawlfish.rb" />
|
95
|
+
<option value="$PROJECT_DIR$/lib/crawlfish.rb" />
|
96
|
+
<option value="$PROJECT_DIR$/lib/crawlfish/version.rb" />
|
97
|
+
</list>
|
98
|
+
</option>
|
99
|
+
</component>
|
100
|
+
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
|
101
|
+
<OptionsSetting value="true" id="Add" />
|
102
|
+
<OptionsSetting value="true" id="Remove" />
|
103
|
+
<OptionsSetting value="true" id="Checkout" />
|
104
|
+
<OptionsSetting value="true" id="Update" />
|
105
|
+
<OptionsSetting value="true" id="Status" />
|
106
|
+
<OptionsSetting value="true" id="Edit" />
|
107
|
+
<ConfirmationsSetting value="2" id="Add" />
|
108
|
+
<ConfirmationsSetting value="0" id="Remove" />
|
109
|
+
</component>
|
110
|
+
<component name="ProjectReloadState">
|
111
|
+
<option name="STATE" value="0" />
|
112
|
+
</component>
|
113
|
+
<component name="ProjectView">
|
114
|
+
<navigator currentView="ProjectPane" proportions="" version="1" splitterProportion="0.5">
|
115
|
+
<flattenPackages />
|
116
|
+
<showMembers />
|
117
|
+
<showModules />
|
118
|
+
<showLibraryContents />
|
119
|
+
<hideEmptyPackages />
|
120
|
+
<abbreviatePackageNames />
|
121
|
+
<autoscrollToSource />
|
122
|
+
<autoscrollFromSource />
|
123
|
+
<sortByType />
|
124
|
+
</navigator>
|
125
|
+
<panes>
|
126
|
+
<pane id="Favorites" />
|
127
|
+
<pane id="ProjectPane">
|
128
|
+
<subPane>
|
129
|
+
<PATH>
|
130
|
+
<PATH_ELEMENT>
|
131
|
+
<option name="myItemId" value="crawlfish" />
|
132
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
133
|
+
</PATH_ELEMENT>
|
134
|
+
</PATH>
|
135
|
+
<PATH>
|
136
|
+
<PATH_ELEMENT>
|
137
|
+
<option name="myItemId" value="crawlfish" />
|
138
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
139
|
+
</PATH_ELEMENT>
|
140
|
+
<PATH_ELEMENT>
|
141
|
+
<option name="myItemId" value="crawlfish" />
|
142
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
143
|
+
</PATH_ELEMENT>
|
144
|
+
</PATH>
|
145
|
+
<PATH>
|
146
|
+
<PATH_ELEMENT>
|
147
|
+
<option name="myItemId" value="crawlfish" />
|
148
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
149
|
+
</PATH_ELEMENT>
|
150
|
+
<PATH_ELEMENT>
|
151
|
+
<option name="myItemId" value="crawlfish" />
|
152
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
153
|
+
</PATH_ELEMENT>
|
154
|
+
<PATH_ELEMENT>
|
155
|
+
<option name="myItemId" value="lib" />
|
156
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
157
|
+
</PATH_ELEMENT>
|
158
|
+
</PATH>
|
159
|
+
<PATH>
|
160
|
+
<PATH_ELEMENT>
|
161
|
+
<option name="myItemId" value="crawlfish" />
|
162
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
163
|
+
</PATH_ELEMENT>
|
164
|
+
<PATH_ELEMENT>
|
165
|
+
<option name="myItemId" value="crawlfish" />
|
166
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
167
|
+
</PATH_ELEMENT>
|
168
|
+
<PATH_ELEMENT>
|
169
|
+
<option name="myItemId" value="lib" />
|
170
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
171
|
+
</PATH_ELEMENT>
|
172
|
+
<PATH_ELEMENT>
|
173
|
+
<option name="myItemId" value="crawlfish" />
|
174
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
175
|
+
</PATH_ELEMENT>
|
176
|
+
</PATH>
|
177
|
+
</subPane>
|
178
|
+
</pane>
|
179
|
+
<pane id="Scope" />
|
180
|
+
</panes>
|
181
|
+
</component>
|
182
|
+
<component name="PropertiesComponent">
|
183
|
+
<property name="recentsLimit" value="5" />
|
184
|
+
</component>
|
185
|
+
<component name="RunManager" selected="Ruby.crawlfish">
|
186
|
+
<configuration default="false" name="crawlfish" type="RubyRunConfigurationType" factoryName="Ruby" temporary="true">
|
187
|
+
<module name="crawlfish" />
|
188
|
+
<RUBY_RUN_CONFIG NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
|
189
|
+
<RUBY_RUN_CONFIG NAME="WORK DIR" VALUE="$PROJECT_DIR$/lib/crawlfish" />
|
190
|
+
<RUBY_RUN_CONFIG NAME="SHOULD_USE_SDK" VALUE="false" />
|
191
|
+
<RUBY_RUN_CONFIG NAME="ALTERN_SDK_NAME" VALUE="" />
|
192
|
+
<RUBY_RUN_CONFIG NAME="myPassParentEnvs" VALUE="true" />
|
193
|
+
<envs />
|
194
|
+
<EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
|
195
|
+
<EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
|
196
|
+
<RUBY_RUN_CONFIG NAME="SCRIPT_PATH" VALUE="$PROJECT_DIR$/lib/crawlfish/crawlfish.rb" />
|
197
|
+
<RUBY_RUN_CONFIG NAME="SCRIPT_ARGS" VALUE="" />
|
198
|
+
<RunnerSettings RunnerId="RubyRunner" />
|
199
|
+
<ConfigurationWrapper RunnerId="RubyRunner" />
|
200
|
+
<method />
|
201
|
+
</configuration>
|
202
|
+
<configuration default="true" type="RubyRunConfigurationType" factoryName="Ruby">
|
203
|
+
<module name="" />
|
204
|
+
<RUBY_RUN_CONFIG NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
|
205
|
+
<RUBY_RUN_CONFIG NAME="WORK DIR" VALUE="" />
|
206
|
+
<RUBY_RUN_CONFIG NAME="SHOULD_USE_SDK" VALUE="false" />
|
207
|
+
<RUBY_RUN_CONFIG NAME="ALTERN_SDK_NAME" VALUE="" />
|
208
|
+
<RUBY_RUN_CONFIG NAME="myPassParentEnvs" VALUE="true" />
|
209
|
+
<envs />
|
210
|
+
<EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
|
211
|
+
<EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
|
212
|
+
<RUBY_RUN_CONFIG NAME="SCRIPT_PATH" VALUE="" />
|
213
|
+
<RUBY_RUN_CONFIG NAME="SCRIPT_ARGS" VALUE="" />
|
214
|
+
<method />
|
215
|
+
</configuration>
|
216
|
+
<configuration default="true" type="TestUnitRunConfigurationType" factoryName="Test::Unit/Shoulda">
|
217
|
+
<predefined_log_file id="RUBY_TESTUNIT" enabled="true" />
|
218
|
+
<module name="" />
|
219
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
|
220
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="WORK DIR" VALUE="" />
|
221
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="SHOULD_USE_SDK" VALUE="false" />
|
222
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="ALTERN_SDK_NAME" VALUE="" />
|
223
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="myPassParentEnvs" VALUE="true" />
|
224
|
+
<envs />
|
225
|
+
<EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
|
226
|
+
<EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
|
227
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TESTS_FOLDER_PATH" VALUE="" />
|
228
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_SCRIPT_PATH" VALUE="" />
|
229
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_CLASS_NAME" VALUE="" />
|
230
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_FILE_MASK" VALUE="" />
|
231
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_METHOD_NAME" VALUE="" />
|
232
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_TEST_TYPE" VALUE="TEST_SCRIPT" />
|
233
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="INHERITANCE_CHECK_DISABLED" VALUE="false" />
|
234
|
+
<method />
|
235
|
+
</configuration>
|
236
|
+
<list size="1">
|
237
|
+
<item index="0" class="java.lang.String" itemvalue="Ruby.crawlfish" />
|
238
|
+
</list>
|
239
|
+
</component>
|
240
|
+
<component name="ShelveChangesManager" show_recycled="false" />
|
241
|
+
<component name="SvnConfiguration" maxAnnotateRevisions="500">
|
242
|
+
<option name="USER" value="" />
|
243
|
+
<option name="PASSWORD" value="" />
|
244
|
+
<option name="LAST_MERGED_REVISION" />
|
245
|
+
<option name="MERGE_DRY_RUN" value="false" />
|
246
|
+
<option name="MERGE_DIFF_USE_ANCESTRY" value="true" />
|
247
|
+
<option name="UPDATE_LOCK_ON_DEMAND" value="false" />
|
248
|
+
<option name="IGNORE_SPACES_IN_MERGE" value="false" />
|
249
|
+
<option name="DETECT_NESTED_COPIES" value="true" />
|
250
|
+
<option name="CHECK_NESTED_FOR_QUICK_MERGE" value="false" />
|
251
|
+
<option name="IGNORE_SPACES_IN_ANNOTATE" value="true" />
|
252
|
+
<option name="SHOW_MERGE_SOURCES_IN_ANNOTATE" value="true" />
|
253
|
+
<option name="FORCE_UPDATE" value="false" />
|
254
|
+
<configuration useDefault="true">C:\Users\dan\AppData\Roaming\Subversion</configuration>
|
255
|
+
<myIsUseDefaultProxy>false</myIsUseDefaultProxy>
|
256
|
+
</component>
|
257
|
+
<component name="TaskManager">
|
258
|
+
<task active="true" id="Default" summary="Default task">
|
259
|
+
<created>1298757413423</created>
|
260
|
+
<updated>1298757413423</updated>
|
261
|
+
</task>
|
262
|
+
<servers />
|
263
|
+
</component>
|
264
|
+
<component name="ToolWindowManager">
|
265
|
+
<frame x="-8" y="-8" width="1696" height="1026" extended-state="6" />
|
266
|
+
<editor active="true" />
|
267
|
+
<layout>
|
268
|
+
<window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
269
|
+
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
|
270
|
+
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="true" content_ui="tabs" />
|
271
|
+
<window_info id="Dependency Viewer" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
272
|
+
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.12062726" sideWeight="0.67001116" order="0" side_tool="false" content_ui="tabs" />
|
273
|
+
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
274
|
+
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32998884" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
275
|
+
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
276
|
+
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
|
277
|
+
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
278
|
+
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
279
|
+
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
280
|
+
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
281
|
+
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
|
282
|
+
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
|
283
|
+
</layout>
|
284
|
+
</component>
|
285
|
+
<component name="VcsManagerConfiguration">
|
286
|
+
<option name="OFFER_MOVE_TO_ANOTHER_CHANGELIST_ON_PARTIAL_COMMIT" value="true" />
|
287
|
+
<option name="CHECK_CODE_SMELLS_BEFORE_PROJECT_COMMIT" value="true" />
|
288
|
+
<option name="PERFORM_UPDATE_IN_BACKGROUND" value="true" />
|
289
|
+
<option name="PERFORM_COMMIT_IN_BACKGROUND" value="true" />
|
290
|
+
<option name="PERFORM_EDIT_IN_BACKGROUND" value="true" />
|
291
|
+
<option name="PERFORM_CHECKOUT_IN_BACKGROUND" value="true" />
|
292
|
+
<option name="PERFORM_ADD_REMOVE_IN_BACKGROUND" value="true" />
|
293
|
+
<option name="PERFORM_ROLLBACK_IN_BACKGROUND" value="false" />
|
294
|
+
<option name="CHECK_LOCALLY_CHANGED_CONFLICTS_IN_BACKGROUND" value="false" />
|
295
|
+
<option name="ENABLE_BACKGROUND_PROCESSES" value="false" />
|
296
|
+
<option name="CHANGED_ON_SERVER_INTERVAL" value="60" />
|
297
|
+
<option name="SHOW_ONLY_CHANGED_IN_SELECTION_DIFF" value="true" />
|
298
|
+
<option name="CHECK_COMMIT_MESSAGE_SPELLING" value="true" />
|
299
|
+
<option name="FORCE_NON_EMPTY_COMMENT" value="false" />
|
300
|
+
<option name="LAST_COMMIT_MESSAGE" />
|
301
|
+
<option name="MAKE_NEW_CHANGELIST_ACTIVE" value="true" />
|
302
|
+
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false" />
|
303
|
+
<option name="CHECK_FILES_UP_TO_DATE_BEFORE_COMMIT" value="false" />
|
304
|
+
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false" />
|
305
|
+
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false" />
|
306
|
+
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8" />
|
307
|
+
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5" />
|
308
|
+
<option name="ACTIVE_VCS_NAME" />
|
309
|
+
<option name="UPDATE_GROUP_BY_PACKAGES" value="false" />
|
310
|
+
<option name="UPDATE_GROUP_BY_CHANGELIST" value="false" />
|
311
|
+
<option name="SHOW_FILE_HISTORY_AS_TREE" value="false" />
|
312
|
+
<option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6" />
|
313
|
+
</component>
|
314
|
+
<component name="XDebuggerManager">
|
315
|
+
<breakpoint-manager />
|
316
|
+
</component>
|
317
|
+
<component name="editorHistoryManager">
|
318
|
+
<entry file="file://$PROJECT_DIR$/Rakefile">
|
319
|
+
<provider selected="true" editor-type-id="text-editor">
|
320
|
+
<state line="0" column="0" selection-start="0" selection-end="0" vertical-scroll-proportion="0.0">
|
321
|
+
<folding />
|
322
|
+
</state>
|
323
|
+
</provider>
|
324
|
+
</entry>
|
325
|
+
<entry file="file://$PROJECT_DIR$/Gemfile">
|
326
|
+
<provider selected="true" editor-type-id="text-editor">
|
327
|
+
<state line="4" column="0" selection-start="93" selection-end="93" vertical-scroll-proportion="0.0">
|
328
|
+
<folding />
|
329
|
+
</state>
|
330
|
+
</provider>
|
331
|
+
</entry>
|
332
|
+
<entry file="file://$PROJECT_DIR$/crawlfish.gemspec">
|
333
|
+
<provider selected="true" editor-type-id="text-editor">
|
334
|
+
<state line="10" column="21" selection-start="335" selection-end="335" vertical-scroll-proportion="0.0">
|
335
|
+
<folding />
|
336
|
+
</state>
|
337
|
+
</provider>
|
338
|
+
</entry>
|
339
|
+
<entry file="file://$PROJECT_DIR$/lib/crawlfish.rb">
|
340
|
+
<provider selected="true" editor-type-id="text-editor">
|
341
|
+
<state line="61" column="3" selection-start="1587" selection-end="1587" vertical-scroll-proportion="0.0">
|
342
|
+
<folding />
|
343
|
+
</state>
|
344
|
+
</provider>
|
345
|
+
</entry>
|
346
|
+
<entry file="file://$PROJECT_DIR$/lib/crawlfish/version.rb">
|
347
|
+
<provider selected="true" editor-type-id="text-editor">
|
348
|
+
<state line="1" column="18" selection-start="35" selection-end="35" vertical-scroll-proportion="0.01724138">
|
349
|
+
<folding />
|
350
|
+
</state>
|
351
|
+
</provider>
|
352
|
+
</entry>
|
353
|
+
</component>
|
354
|
+
</project>
|
355
|
+
|
data/Gemfile
ADDED
data/Rakefile
ADDED
data/crawlfish.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "crawlfish/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "crawlfish"
|
7
|
+
s.version = Crawlfish::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Dan Neumann"]
|
10
|
+
s.email = ["danneumanntx@gmail.com"]
|
11
|
+
s.homepage = "http://danneu.com"
|
12
|
+
s.summary = %q{Crawlfish the Search Engine Crawler}
|
13
|
+
s.description = %q{Crawls and scrapes search engine results pages.}
|
14
|
+
|
15
|
+
s.rubyforge_project = "crawlfish"
|
16
|
+
|
17
|
+
s.add_dependency "nokogiri"
|
18
|
+
|
19
|
+
s.files = `git ls-files`.split("\n")
|
20
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
21
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
22
|
+
s.require_paths = ["lib"]
|
23
|
+
end
|
data/lib/crawlfish.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
module Crawlfish
|
5
|
+
class GoogleScraper
|
6
|
+
attr_accessor :website, :keyword, :user_agent, :start, :i, :position
|
7
|
+
def initialize(options)
|
8
|
+
@website = options[:website]
|
9
|
+
@keyword = options[:keyword]
|
10
|
+
@user_agent = "Windows IE 6"
|
11
|
+
@i = 1 # position (1-100)
|
12
|
+
@position = nil
|
13
|
+
@page = Page.new
|
14
|
+
end
|
15
|
+
def scrape
|
16
|
+
until @position or @page.current_page > 10
|
17
|
+
build_query
|
18
|
+
search_this_page
|
19
|
+
sleep 2000
|
20
|
+
next_page
|
21
|
+
end
|
22
|
+
@position ||= -1 # -1 if not found
|
23
|
+
{:position => @position, :measured_at => Time.now, :engine => "Google"}
|
24
|
+
end
|
25
|
+
|
26
|
+
# scrape helpers
|
27
|
+
def build_query
|
28
|
+
keyword = @keyword.split.join("+")
|
29
|
+
@url = "http://www.google.com/search?q=#{keyword}&start=#{@page.start_number}"
|
30
|
+
end
|
31
|
+
def search_this_page
|
32
|
+
doc = Nokogiri::HTML(open(@url, "User-Agent" => user_agent))
|
33
|
+
links = doc.xpath('//h3/a[contains(@class, "l")]')
|
34
|
+
# If links are empty, position is not found and the search ends
|
35
|
+
if links.empty?
|
36
|
+
@position = -1
|
37
|
+
return
|
38
|
+
end
|
39
|
+
|
40
|
+
host = URI::parse(URI::extract(result.url.to_s).first).host
|
41
|
+
if host == @domain
|
42
|
+
@position = result.rank
|
43
|
+
end
|
44
|
+
end
|
45
|
+
def next_page
|
46
|
+
@page.next_page
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
class Page
|
51
|
+
attr_accessor :current_page
|
52
|
+
def initialize
|
53
|
+
@current_page = 1
|
54
|
+
end
|
55
|
+
def start_number # convert current_page into the number for the query URL
|
56
|
+
(@current_page - 1) * 10
|
57
|
+
end
|
58
|
+
def next_page
|
59
|
+
@current_page += 1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
metadata
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: crawlfish
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Dan Neumann
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-02-26 00:00:00 -06:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: nokogiri
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Crawls and scrapes search engine results pages.
|
36
|
+
email:
|
37
|
+
- danneumanntx@gmail.com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files: []
|
43
|
+
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- .idea/.name
|
47
|
+
- .idea/.rakeTasks
|
48
|
+
- .idea/crawlfish.iml
|
49
|
+
- .idea/dictionaries/dan.xml
|
50
|
+
- .idea/encodings.xml
|
51
|
+
- .idea/misc.xml
|
52
|
+
- .idea/modules.xml
|
53
|
+
- .idea/vcs.xml
|
54
|
+
- .idea/workspace.xml
|
55
|
+
- Gemfile
|
56
|
+
- Rakefile
|
57
|
+
- crawlfish.gemspec
|
58
|
+
- lib/crawlfish.rb
|
59
|
+
- lib/crawlfish/version.rb
|
60
|
+
has_rdoc: true
|
61
|
+
homepage: http://danneu.com
|
62
|
+
licenses: []
|
63
|
+
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
|
67
|
+
require_paths:
|
68
|
+
- lib
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
none: false
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
hash: 3
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
version: "0"
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
none: false
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
hash: 3
|
84
|
+
segments:
|
85
|
+
- 0
|
86
|
+
version: "0"
|
87
|
+
requirements: []
|
88
|
+
|
89
|
+
rubyforge_project: crawlfish
|
90
|
+
rubygems_version: 1.3.7
|
91
|
+
signing_key:
|
92
|
+
specification_version: 3
|
93
|
+
summary: Crawlfish the Search Engine Crawler
|
94
|
+
test_files: []
|
95
|
+
|