crawlfish 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
@@ -0,0 +1 @@
1
+ crawlfish
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <Settings><!--This file was automatically generated by Ruby plugin.
3
+ You are allowed to:
4
+ 1. Remove rake task
5
+ 2. Add existing rake tasks
6
+ To add existing rake tasks automatically delete this file and reload the project.
7
+ --><RakeGroup description="" fullCmd="" taksId="rake"><RakeTask description="Build crawlfish-0.0.1.gem into the pkg directory" fullCmd="build" taksId="build" /><RakeTask description="Build and install crawlfish-0.0.1.gem into system gems" fullCmd="install" taksId="install" /><RakeTask description="Create tag v0.0.1 and build and push crawlfish-0.0.1.gem to Rubygems" fullCmd="release" taksId="release" /></RakeGroup></Settings>
@@ -0,0 +1,10 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="RUBY_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ <orderEntry type="library" scope="PROVIDED" name="[gem] bundler (v1.0.10, C:/RailsInstaller/Ruby1.8.7/lib/ruby/gems/1.8/gems/bundler-1.0.10)" level="application" />
8
+ </component>
9
+ </module>
10
+
@@ -0,0 +1,7 @@
1
+ <component name="ProjectDictionaryState">
2
+ <dictionary name="dan">
3
+ <words>
4
+ <w>crawlfish</w>
5
+ </words>
6
+ </dictionary>
7
+ </component>
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
4
+ </project>
5
+
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="DependencyValidationManager">
4
+ <option name="SKIP_IMPORT_STATEMENTS" value="false" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Ruby SDK 1.8.7-p330" project-jdk-type="RUBY_SDK" />
7
+ </project>
8
+
@@ -0,0 +1,9 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/crawlfish.iml" filepath="$PROJECT_DIR$/.idea/crawlfish.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
9
+
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
7
+
@@ -0,0 +1,355 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="52814ec4-7729-4991-a410-d3f62df9d527" name="Default" comment="">
5
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/.name" />
6
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/.rakeTasks" />
7
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/crawlfish.iml" />
8
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.gitignore" />
9
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/dictionaries/dan.xml" />
10
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/encodings.xml" />
11
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/misc.xml" />
12
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/modules.xml" />
13
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/vcs.xml" />
14
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
15
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/Gemfile" />
16
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/Rakefile" />
17
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/crawlfish.gemspec" />
18
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/lib/crawlfish.rb" />
19
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/lib/crawlfish/version.rb" />
20
+ </list>
21
+ <ignored path="crawlfish.iws" />
22
+ <ignored path=".idea/workspace.xml" />
23
+ <option name="TRACKING_ENABLED" value="true" />
24
+ <option name="SHOW_DIALOG" value="false" />
25
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
26
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
27
+ <option name="LAST_RESOLUTION" value="IGNORE" />
28
+ </component>
29
+ <component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
30
+ <component name="CoverageDataManager" choice="3" />
31
+ <component name="CreatePatchCommitExecutor">
32
+ <option name="PATCH_PATH" value="" />
33
+ <option name="REVERSE_PATCH" value="false" />
34
+ </component>
35
+ <component name="DaemonCodeAnalyzer">
36
+ <disable_hints />
37
+ </component>
38
+ <component name="FavoritesManager">
39
+ <favorites_list name="crawlfish" />
40
+ </component>
41
+ <component name="FileEditorManager">
42
+ <leaf>
43
+ <file leaf-file-name="crawlfish.gemspec" pinned="false" current="false" current-in-tab="false">
44
+ <entry file="file://$PROJECT_DIR$/crawlfish.gemspec">
45
+ <provider selected="true" editor-type-id="text-editor">
46
+ <state line="10" column="21" selection-start="335" selection-end="335" vertical-scroll-proportion="0.0">
47
+ <folding />
48
+ </state>
49
+ </provider>
50
+ </entry>
51
+ </file>
52
+ <file leaf-file-name="version.rb" pinned="false" current="true" current-in-tab="true">
53
+ <entry file="file://$PROJECT_DIR$/lib/crawlfish/version.rb">
54
+ <provider selected="true" editor-type-id="text-editor">
55
+ <state line="1" column="18" selection-start="35" selection-end="35" vertical-scroll-proportion="0.01724138">
56
+ <folding />
57
+ </state>
58
+ </provider>
59
+ </entry>
60
+ </file>
61
+ <file leaf-file-name="Gemfile" pinned="false" current="false" current-in-tab="false">
62
+ <entry file="file://$PROJECT_DIR$/Gemfile">
63
+ <provider selected="true" editor-type-id="text-editor">
64
+ <state line="4" column="0" selection-start="93" selection-end="93" vertical-scroll-proportion="0.0">
65
+ <folding />
66
+ </state>
67
+ </provider>
68
+ </entry>
69
+ </file>
70
+ <file leaf-file-name="crawlfish.rb" pinned="false" current="false" current-in-tab="false">
71
+ <entry file="file://$PROJECT_DIR$/lib/crawlfish.rb">
72
+ <provider selected="true" editor-type-id="text-editor">
73
+ <state line="61" column="3" selection-start="1587" selection-end="1587" vertical-scroll-proportion="0.0">
74
+ <folding />
75
+ </state>
76
+ </provider>
77
+ </entry>
78
+ </file>
79
+ </leaf>
80
+ </component>
81
+ <component name="FindManager">
82
+ <FindUsagesManager>
83
+ <setting name="OPEN_NEW_TAB" value="false" />
84
+ </FindUsagesManager>
85
+ </component>
86
+ <component name="Git.Settings">
87
+ <option name="CHECKOUT_INCLUDE_TAGS" value="false" />
88
+ <option name="UPDATE_CHANGES_POLICY" value="STASH" />
89
+ </component>
90
+ <component name="IdeDocumentHistory">
91
+ <option name="changedFiles">
92
+ <list>
93
+ <option value="$PROJECT_DIR$/crawlfish.gemspec" />
94
+ <option value="$PROJECT_DIR$/lib/crawlfish/crawlfish.rb" />
95
+ <option value="$PROJECT_DIR$/lib/crawlfish.rb" />
96
+ <option value="$PROJECT_DIR$/lib/crawlfish/version.rb" />
97
+ </list>
98
+ </option>
99
+ </component>
100
+ <component name="ProjectLevelVcsManager" settingsEditedManually="false">
101
+ <OptionsSetting value="true" id="Add" />
102
+ <OptionsSetting value="true" id="Remove" />
103
+ <OptionsSetting value="true" id="Checkout" />
104
+ <OptionsSetting value="true" id="Update" />
105
+ <OptionsSetting value="true" id="Status" />
106
+ <OptionsSetting value="true" id="Edit" />
107
+ <ConfirmationsSetting value="2" id="Add" />
108
+ <ConfirmationsSetting value="0" id="Remove" />
109
+ </component>
110
+ <component name="ProjectReloadState">
111
+ <option name="STATE" value="0" />
112
+ </component>
113
+ <component name="ProjectView">
114
+ <navigator currentView="ProjectPane" proportions="" version="1" splitterProportion="0.5">
115
+ <flattenPackages />
116
+ <showMembers />
117
+ <showModules />
118
+ <showLibraryContents />
119
+ <hideEmptyPackages />
120
+ <abbreviatePackageNames />
121
+ <autoscrollToSource />
122
+ <autoscrollFromSource />
123
+ <sortByType />
124
+ </navigator>
125
+ <panes>
126
+ <pane id="Favorites" />
127
+ <pane id="ProjectPane">
128
+ <subPane>
129
+ <PATH>
130
+ <PATH_ELEMENT>
131
+ <option name="myItemId" value="crawlfish" />
132
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
133
+ </PATH_ELEMENT>
134
+ </PATH>
135
+ <PATH>
136
+ <PATH_ELEMENT>
137
+ <option name="myItemId" value="crawlfish" />
138
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
139
+ </PATH_ELEMENT>
140
+ <PATH_ELEMENT>
141
+ <option name="myItemId" value="crawlfish" />
142
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
143
+ </PATH_ELEMENT>
144
+ </PATH>
145
+ <PATH>
146
+ <PATH_ELEMENT>
147
+ <option name="myItemId" value="crawlfish" />
148
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
149
+ </PATH_ELEMENT>
150
+ <PATH_ELEMENT>
151
+ <option name="myItemId" value="crawlfish" />
152
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
153
+ </PATH_ELEMENT>
154
+ <PATH_ELEMENT>
155
+ <option name="myItemId" value="lib" />
156
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
157
+ </PATH_ELEMENT>
158
+ </PATH>
159
+ <PATH>
160
+ <PATH_ELEMENT>
161
+ <option name="myItemId" value="crawlfish" />
162
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
163
+ </PATH_ELEMENT>
164
+ <PATH_ELEMENT>
165
+ <option name="myItemId" value="crawlfish" />
166
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
167
+ </PATH_ELEMENT>
168
+ <PATH_ELEMENT>
169
+ <option name="myItemId" value="lib" />
170
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
171
+ </PATH_ELEMENT>
172
+ <PATH_ELEMENT>
173
+ <option name="myItemId" value="crawlfish" />
174
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
175
+ </PATH_ELEMENT>
176
+ </PATH>
177
+ </subPane>
178
+ </pane>
179
+ <pane id="Scope" />
180
+ </panes>
181
+ </component>
182
+ <component name="PropertiesComponent">
183
+ <property name="recentsLimit" value="5" />
184
+ </component>
185
+ <component name="RunManager" selected="Ruby.crawlfish">
186
+ <configuration default="false" name="crawlfish" type="RubyRunConfigurationType" factoryName="Ruby" temporary="true">
187
+ <module name="crawlfish" />
188
+ <RUBY_RUN_CONFIG NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
189
+ <RUBY_RUN_CONFIG NAME="WORK DIR" VALUE="$PROJECT_DIR$/lib/crawlfish" />
190
+ <RUBY_RUN_CONFIG NAME="SHOULD_USE_SDK" VALUE="false" />
191
+ <RUBY_RUN_CONFIG NAME="ALTERN_SDK_NAME" VALUE="" />
192
+ <RUBY_RUN_CONFIG NAME="myPassParentEnvs" VALUE="true" />
193
+ <envs />
194
+ <EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
195
+ <EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
196
+ <RUBY_RUN_CONFIG NAME="SCRIPT_PATH" VALUE="$PROJECT_DIR$/lib/crawlfish/crawlfish.rb" />
197
+ <RUBY_RUN_CONFIG NAME="SCRIPT_ARGS" VALUE="" />
198
+ <RunnerSettings RunnerId="RubyRunner" />
199
+ <ConfigurationWrapper RunnerId="RubyRunner" />
200
+ <method />
201
+ </configuration>
202
+ <configuration default="true" type="RubyRunConfigurationType" factoryName="Ruby">
203
+ <module name="" />
204
+ <RUBY_RUN_CONFIG NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
205
+ <RUBY_RUN_CONFIG NAME="WORK DIR" VALUE="" />
206
+ <RUBY_RUN_CONFIG NAME="SHOULD_USE_SDK" VALUE="false" />
207
+ <RUBY_RUN_CONFIG NAME="ALTERN_SDK_NAME" VALUE="" />
208
+ <RUBY_RUN_CONFIG NAME="myPassParentEnvs" VALUE="true" />
209
+ <envs />
210
+ <EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
211
+ <EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
212
+ <RUBY_RUN_CONFIG NAME="SCRIPT_PATH" VALUE="" />
213
+ <RUBY_RUN_CONFIG NAME="SCRIPT_ARGS" VALUE="" />
214
+ <method />
215
+ </configuration>
216
+ <configuration default="true" type="TestUnitRunConfigurationType" factoryName="Test::Unit/Shoulda">
217
+ <predefined_log_file id="RUBY_TESTUNIT" enabled="true" />
218
+ <module name="" />
219
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
220
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="WORK DIR" VALUE="" />
221
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="SHOULD_USE_SDK" VALUE="false" />
222
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="ALTERN_SDK_NAME" VALUE="" />
223
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="myPassParentEnvs" VALUE="true" />
224
+ <envs />
225
+ <EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
226
+ <EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
227
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TESTS_FOLDER_PATH" VALUE="" />
228
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_SCRIPT_PATH" VALUE="" />
229
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_CLASS_NAME" VALUE="" />
230
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_FILE_MASK" VALUE="" />
231
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_METHOD_NAME" VALUE="" />
232
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_TEST_TYPE" VALUE="TEST_SCRIPT" />
233
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="INHERITANCE_CHECK_DISABLED" VALUE="false" />
234
+ <method />
235
+ </configuration>
236
+ <list size="1">
237
+ <item index="0" class="java.lang.String" itemvalue="Ruby.crawlfish" />
238
+ </list>
239
+ </component>
240
+ <component name="ShelveChangesManager" show_recycled="false" />
241
+ <component name="SvnConfiguration" maxAnnotateRevisions="500">
242
+ <option name="USER" value="" />
243
+ <option name="PASSWORD" value="" />
244
+ <option name="LAST_MERGED_REVISION" />
245
+ <option name="MERGE_DRY_RUN" value="false" />
246
+ <option name="MERGE_DIFF_USE_ANCESTRY" value="true" />
247
+ <option name="UPDATE_LOCK_ON_DEMAND" value="false" />
248
+ <option name="IGNORE_SPACES_IN_MERGE" value="false" />
249
+ <option name="DETECT_NESTED_COPIES" value="true" />
250
+ <option name="CHECK_NESTED_FOR_QUICK_MERGE" value="false" />
251
+ <option name="IGNORE_SPACES_IN_ANNOTATE" value="true" />
252
+ <option name="SHOW_MERGE_SOURCES_IN_ANNOTATE" value="true" />
253
+ <option name="FORCE_UPDATE" value="false" />
254
+ <configuration useDefault="true">C:\Users\dan\AppData\Roaming\Subversion</configuration>
255
+ <myIsUseDefaultProxy>false</myIsUseDefaultProxy>
256
+ </component>
257
+ <component name="TaskManager">
258
+ <task active="true" id="Default" summary="Default task">
259
+ <created>1298757413423</created>
260
+ <updated>1298757413423</updated>
261
+ </task>
262
+ <servers />
263
+ </component>
264
+ <component name="ToolWindowManager">
265
+ <frame x="-8" y="-8" width="1696" height="1026" extended-state="6" />
266
+ <editor active="true" />
267
+ <layout>
268
+ <window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
269
+ <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
270
+ <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="true" content_ui="tabs" />
271
+ <window_info id="Dependency Viewer" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
272
+ <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.12062726" sideWeight="0.67001116" order="0" side_tool="false" content_ui="tabs" />
273
+ <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
274
+ <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32998884" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
275
+ <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
276
+ <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
277
+ <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
278
+ <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
279
+ <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
280
+ <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
281
+ <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
282
+ <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
283
+ </layout>
284
+ </component>
285
+ <component name="VcsManagerConfiguration">
286
+ <option name="OFFER_MOVE_TO_ANOTHER_CHANGELIST_ON_PARTIAL_COMMIT" value="true" />
287
+ <option name="CHECK_CODE_SMELLS_BEFORE_PROJECT_COMMIT" value="true" />
288
+ <option name="PERFORM_UPDATE_IN_BACKGROUND" value="true" />
289
+ <option name="PERFORM_COMMIT_IN_BACKGROUND" value="true" />
290
+ <option name="PERFORM_EDIT_IN_BACKGROUND" value="true" />
291
+ <option name="PERFORM_CHECKOUT_IN_BACKGROUND" value="true" />
292
+ <option name="PERFORM_ADD_REMOVE_IN_BACKGROUND" value="true" />
293
+ <option name="PERFORM_ROLLBACK_IN_BACKGROUND" value="false" />
294
+ <option name="CHECK_LOCALLY_CHANGED_CONFLICTS_IN_BACKGROUND" value="false" />
295
+ <option name="ENABLE_BACKGROUND_PROCESSES" value="false" />
296
+ <option name="CHANGED_ON_SERVER_INTERVAL" value="60" />
297
+ <option name="SHOW_ONLY_CHANGED_IN_SELECTION_DIFF" value="true" />
298
+ <option name="CHECK_COMMIT_MESSAGE_SPELLING" value="true" />
299
+ <option name="FORCE_NON_EMPTY_COMMENT" value="false" />
300
+ <option name="LAST_COMMIT_MESSAGE" />
301
+ <option name="MAKE_NEW_CHANGELIST_ACTIVE" value="true" />
302
+ <option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false" />
303
+ <option name="CHECK_FILES_UP_TO_DATE_BEFORE_COMMIT" value="false" />
304
+ <option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false" />
305
+ <option name="REFORMAT_BEFORE_FILE_COMMIT" value="false" />
306
+ <option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8" />
307
+ <option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5" />
308
+ <option name="ACTIVE_VCS_NAME" />
309
+ <option name="UPDATE_GROUP_BY_PACKAGES" value="false" />
310
+ <option name="UPDATE_GROUP_BY_CHANGELIST" value="false" />
311
+ <option name="SHOW_FILE_HISTORY_AS_TREE" value="false" />
312
+ <option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6" />
313
+ </component>
314
+ <component name="XDebuggerManager">
315
+ <breakpoint-manager />
316
+ </component>
317
+ <component name="editorHistoryManager">
318
+ <entry file="file://$PROJECT_DIR$/Rakefile">
319
+ <provider selected="true" editor-type-id="text-editor">
320
+ <state line="0" column="0" selection-start="0" selection-end="0" vertical-scroll-proportion="0.0">
321
+ <folding />
322
+ </state>
323
+ </provider>
324
+ </entry>
325
+ <entry file="file://$PROJECT_DIR$/Gemfile">
326
+ <provider selected="true" editor-type-id="text-editor">
327
+ <state line="4" column="0" selection-start="93" selection-end="93" vertical-scroll-proportion="0.0">
328
+ <folding />
329
+ </state>
330
+ </provider>
331
+ </entry>
332
+ <entry file="file://$PROJECT_DIR$/crawlfish.gemspec">
333
+ <provider selected="true" editor-type-id="text-editor">
334
+ <state line="10" column="21" selection-start="335" selection-end="335" vertical-scroll-proportion="0.0">
335
+ <folding />
336
+ </state>
337
+ </provider>
338
+ </entry>
339
+ <entry file="file://$PROJECT_DIR$/lib/crawlfish.rb">
340
+ <provider selected="true" editor-type-id="text-editor">
341
+ <state line="61" column="3" selection-start="1587" selection-end="1587" vertical-scroll-proportion="0.0">
342
+ <folding />
343
+ </state>
344
+ </provider>
345
+ </entry>
346
+ <entry file="file://$PROJECT_DIR$/lib/crawlfish/version.rb">
347
+ <provider selected="true" editor-type-id="text-editor">
348
+ <state line="1" column="18" selection-start="35" selection-end="35" vertical-scroll-proportion="0.01724138">
349
+ <folding />
350
+ </state>
351
+ </provider>
352
+ </entry>
353
+ </component>
354
+ </project>
355
+
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in crawlfish.gemspec
4
+ gemspec
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "crawlfish/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "crawlfish"
7
+ s.version = Crawlfish::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Dan Neumann"]
10
+ s.email = ["danneumanntx@gmail.com"]
11
+ s.homepage = "http://danneu.com"
12
+ s.summary = %q{Crawlfish the Search Engine Crawler}
13
+ s.description = %q{Crawls and scrapes search engine results pages.}
14
+
15
+ s.rubyforge_project = "crawlfish"
16
+
17
+ s.add_dependency "nokogiri"
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
+ s.require_paths = ["lib"]
23
+ end
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ module Crawlfish
5
+ class GoogleScraper
6
+ attr_accessor :website, :keyword, :user_agent, :start, :i, :position
7
+ def initialize(options)
8
+ @website = options[:website]
9
+ @keyword = options[:keyword]
10
+ @user_agent = "Windows IE 6"
11
+ @i = 1 # position (1-100)
12
+ @position = nil
13
+ @page = Page.new
14
+ end
15
+ def scrape
16
+ until @position or @page.current_page > 10
17
+ build_query
18
+ search_this_page
19
+ sleep 2000
20
+ next_page
21
+ end
22
+ @position ||= -1 # -1 if not found
23
+ {:position => @position, :measured_at => Time.now, :engine => "Google"}
24
+ end
25
+
26
+ # scrape helpers
27
+ def build_query
28
+ keyword = @keyword.split.join("+")
29
+ @url = "http://www.google.com/search?q=#{keyword}&start=#{@page.start_number}"
30
+ end
31
+ def search_this_page
32
+ doc = Nokogiri::HTML(open(@url, "User-Agent" => user_agent))
33
+ links = doc.xpath('//h3/a[contains(@class, "l")]')
34
+ # If links are empty, position is not found and the search ends
35
+ if links.empty?
36
+ @position = -1
37
+ return
38
+ end
39
+
40
+ host = URI::parse(URI::extract(result.url.to_s).first).host
41
+ if host == @domain
42
+ @position = result.rank
43
+ end
44
+ end
45
+ def next_page
46
+ @page.next_page
47
+ end
48
+
49
+ end
50
+ class Page
51
+ attr_accessor :current_page
52
+ def initialize
53
+ @current_page = 1
54
+ end
55
+ def start_number # convert current_page into the number for the query URL
56
+ (@current_page - 1) * 10
57
+ end
58
+ def next_page
59
+ @current_page += 1
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module Crawlfish
2
+ VERSION = "0.0.2"
3
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crawlfish
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 2
10
+ version: 0.0.2
11
+ platform: ruby
12
+ authors:
13
+ - Dan Neumann
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-26 00:00:00 -06:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Crawls and scrapes search engine results pages.
36
+ email:
37
+ - danneumanntx@gmail.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - .idea/.name
47
+ - .idea/.rakeTasks
48
+ - .idea/crawlfish.iml
49
+ - .idea/dictionaries/dan.xml
50
+ - .idea/encodings.xml
51
+ - .idea/misc.xml
52
+ - .idea/modules.xml
53
+ - .idea/vcs.xml
54
+ - .idea/workspace.xml
55
+ - Gemfile
56
+ - Rakefile
57
+ - crawlfish.gemspec
58
+ - lib/crawlfish.rb
59
+ - lib/crawlfish/version.rb
60
+ has_rdoc: true
61
+ homepage: http://danneu.com
62
+ licenses: []
63
+
64
+ post_install_message:
65
+ rdoc_options: []
66
+
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 3
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ hash: 3
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ requirements: []
88
+
89
+ rubyforge_project: crawlfish
90
+ rubygems_version: 1.3.7
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: Crawlfish the Search Engine Crawler
94
+ test_files: []
95
+