crawlfish 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
@@ -0,0 +1 @@
1
+ crawlfish
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <Settings><!--This file was automatically generated by Ruby plugin.
3
+ You are allowed to:
4
+ 1. Remove rake task
5
+ 2. Add existing rake tasks
6
+ To add existing rake tasks automatically delete this file and reload the project.
7
+ --><RakeGroup description="" fullCmd="" taksId="rake"><RakeTask description="Build crawlfish-0.0.1.gem into the pkg directory" fullCmd="build" taksId="build" /><RakeTask description="Build and install crawlfish-0.0.1.gem into system gems" fullCmd="install" taksId="install" /><RakeTask description="Create tag v0.0.1 and build and push crawlfish-0.0.1.gem to Rubygems" fullCmd="release" taksId="release" /></RakeGroup></Settings>
@@ -0,0 +1,10 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="RUBY_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ <orderEntry type="library" scope="PROVIDED" name="[gem] bundler (v1.0.10, C:/RailsInstaller/Ruby1.8.7/lib/ruby/gems/1.8/gems/bundler-1.0.10)" level="application" />
8
+ </component>
9
+ </module>
10
+
@@ -0,0 +1,7 @@
1
+ <component name="ProjectDictionaryState">
2
+ <dictionary name="dan">
3
+ <words>
4
+ <w>crawlfish</w>
5
+ </words>
6
+ </dictionary>
7
+ </component>
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
4
+ </project>
5
+
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="DependencyValidationManager">
4
+ <option name="SKIP_IMPORT_STATEMENTS" value="false" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Ruby SDK 1.8.7-p330" project-jdk-type="RUBY_SDK" />
7
+ </project>
8
+
@@ -0,0 +1,9 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/crawlfish.iml" filepath="$PROJECT_DIR$/.idea/crawlfish.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
9
+
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
7
+
@@ -0,0 +1,355 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="52814ec4-7729-4991-a410-d3f62df9d527" name="Default" comment="">
5
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/.name" />
6
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/.rakeTasks" />
7
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/crawlfish.iml" />
8
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.gitignore" />
9
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/dictionaries/dan.xml" />
10
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/encodings.xml" />
11
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/misc.xml" />
12
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/modules.xml" />
13
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/vcs.xml" />
14
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
15
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/Gemfile" />
16
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/Rakefile" />
17
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/crawlfish.gemspec" />
18
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/lib/crawlfish.rb" />
19
+ <change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/lib/crawlfish/version.rb" />
20
+ </list>
21
+ <ignored path="crawlfish.iws" />
22
+ <ignored path=".idea/workspace.xml" />
23
+ <option name="TRACKING_ENABLED" value="true" />
24
+ <option name="SHOW_DIALOG" value="false" />
25
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
26
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
27
+ <option name="LAST_RESOLUTION" value="IGNORE" />
28
+ </component>
29
+ <component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
30
+ <component name="CoverageDataManager" choice="3" />
31
+ <component name="CreatePatchCommitExecutor">
32
+ <option name="PATCH_PATH" value="" />
33
+ <option name="REVERSE_PATCH" value="false" />
34
+ </component>
35
+ <component name="DaemonCodeAnalyzer">
36
+ <disable_hints />
37
+ </component>
38
+ <component name="FavoritesManager">
39
+ <favorites_list name="crawlfish" />
40
+ </component>
41
+ <component name="FileEditorManager">
42
+ <leaf>
43
+ <file leaf-file-name="crawlfish.gemspec" pinned="false" current="false" current-in-tab="false">
44
+ <entry file="file://$PROJECT_DIR$/crawlfish.gemspec">
45
+ <provider selected="true" editor-type-id="text-editor">
46
+ <state line="10" column="21" selection-start="335" selection-end="335" vertical-scroll-proportion="0.0">
47
+ <folding />
48
+ </state>
49
+ </provider>
50
+ </entry>
51
+ </file>
52
+ <file leaf-file-name="version.rb" pinned="false" current="true" current-in-tab="true">
53
+ <entry file="file://$PROJECT_DIR$/lib/crawlfish/version.rb">
54
+ <provider selected="true" editor-type-id="text-editor">
55
+ <state line="1" column="18" selection-start="35" selection-end="35" vertical-scroll-proportion="0.01724138">
56
+ <folding />
57
+ </state>
58
+ </provider>
59
+ </entry>
60
+ </file>
61
+ <file leaf-file-name="Gemfile" pinned="false" current="false" current-in-tab="false">
62
+ <entry file="file://$PROJECT_DIR$/Gemfile">
63
+ <provider selected="true" editor-type-id="text-editor">
64
+ <state line="4" column="0" selection-start="93" selection-end="93" vertical-scroll-proportion="0.0">
65
+ <folding />
66
+ </state>
67
+ </provider>
68
+ </entry>
69
+ </file>
70
+ <file leaf-file-name="crawlfish.rb" pinned="false" current="false" current-in-tab="false">
71
+ <entry file="file://$PROJECT_DIR$/lib/crawlfish.rb">
72
+ <provider selected="true" editor-type-id="text-editor">
73
+ <state line="61" column="3" selection-start="1587" selection-end="1587" vertical-scroll-proportion="0.0">
74
+ <folding />
75
+ </state>
76
+ </provider>
77
+ </entry>
78
+ </file>
79
+ </leaf>
80
+ </component>
81
+ <component name="FindManager">
82
+ <FindUsagesManager>
83
+ <setting name="OPEN_NEW_TAB" value="false" />
84
+ </FindUsagesManager>
85
+ </component>
86
+ <component name="Git.Settings">
87
+ <option name="CHECKOUT_INCLUDE_TAGS" value="false" />
88
+ <option name="UPDATE_CHANGES_POLICY" value="STASH" />
89
+ </component>
90
+ <component name="IdeDocumentHistory">
91
+ <option name="changedFiles">
92
+ <list>
93
+ <option value="$PROJECT_DIR$/crawlfish.gemspec" />
94
+ <option value="$PROJECT_DIR$/lib/crawlfish/crawlfish.rb" />
95
+ <option value="$PROJECT_DIR$/lib/crawlfish.rb" />
96
+ <option value="$PROJECT_DIR$/lib/crawlfish/version.rb" />
97
+ </list>
98
+ </option>
99
+ </component>
100
+ <component name="ProjectLevelVcsManager" settingsEditedManually="false">
101
+ <OptionsSetting value="true" id="Add" />
102
+ <OptionsSetting value="true" id="Remove" />
103
+ <OptionsSetting value="true" id="Checkout" />
104
+ <OptionsSetting value="true" id="Update" />
105
+ <OptionsSetting value="true" id="Status" />
106
+ <OptionsSetting value="true" id="Edit" />
107
+ <ConfirmationsSetting value="2" id="Add" />
108
+ <ConfirmationsSetting value="0" id="Remove" />
109
+ </component>
110
+ <component name="ProjectReloadState">
111
+ <option name="STATE" value="0" />
112
+ </component>
113
+ <component name="ProjectView">
114
+ <navigator currentView="ProjectPane" proportions="" version="1" splitterProportion="0.5">
115
+ <flattenPackages />
116
+ <showMembers />
117
+ <showModules />
118
+ <showLibraryContents />
119
+ <hideEmptyPackages />
120
+ <abbreviatePackageNames />
121
+ <autoscrollToSource />
122
+ <autoscrollFromSource />
123
+ <sortByType />
124
+ </navigator>
125
+ <panes>
126
+ <pane id="Favorites" />
127
+ <pane id="ProjectPane">
128
+ <subPane>
129
+ <PATH>
130
+ <PATH_ELEMENT>
131
+ <option name="myItemId" value="crawlfish" />
132
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
133
+ </PATH_ELEMENT>
134
+ </PATH>
135
+ <PATH>
136
+ <PATH_ELEMENT>
137
+ <option name="myItemId" value="crawlfish" />
138
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
139
+ </PATH_ELEMENT>
140
+ <PATH_ELEMENT>
141
+ <option name="myItemId" value="crawlfish" />
142
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
143
+ </PATH_ELEMENT>
144
+ </PATH>
145
+ <PATH>
146
+ <PATH_ELEMENT>
147
+ <option name="myItemId" value="crawlfish" />
148
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
149
+ </PATH_ELEMENT>
150
+ <PATH_ELEMENT>
151
+ <option name="myItemId" value="crawlfish" />
152
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
153
+ </PATH_ELEMENT>
154
+ <PATH_ELEMENT>
155
+ <option name="myItemId" value="lib" />
156
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
157
+ </PATH_ELEMENT>
158
+ </PATH>
159
+ <PATH>
160
+ <PATH_ELEMENT>
161
+ <option name="myItemId" value="crawlfish" />
162
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
163
+ </PATH_ELEMENT>
164
+ <PATH_ELEMENT>
165
+ <option name="myItemId" value="crawlfish" />
166
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
167
+ </PATH_ELEMENT>
168
+ <PATH_ELEMENT>
169
+ <option name="myItemId" value="lib" />
170
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
171
+ </PATH_ELEMENT>
172
+ <PATH_ELEMENT>
173
+ <option name="myItemId" value="crawlfish" />
174
+ <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
175
+ </PATH_ELEMENT>
176
+ </PATH>
177
+ </subPane>
178
+ </pane>
179
+ <pane id="Scope" />
180
+ </panes>
181
+ </component>
182
+ <component name="PropertiesComponent">
183
+ <property name="recentsLimit" value="5" />
184
+ </component>
185
+ <component name="RunManager" selected="Ruby.crawlfish">
186
+ <configuration default="false" name="crawlfish" type="RubyRunConfigurationType" factoryName="Ruby" temporary="true">
187
+ <module name="crawlfish" />
188
+ <RUBY_RUN_CONFIG NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
189
+ <RUBY_RUN_CONFIG NAME="WORK DIR" VALUE="$PROJECT_DIR$/lib/crawlfish" />
190
+ <RUBY_RUN_CONFIG NAME="SHOULD_USE_SDK" VALUE="false" />
191
+ <RUBY_RUN_CONFIG NAME="ALTERN_SDK_NAME" VALUE="" />
192
+ <RUBY_RUN_CONFIG NAME="myPassParentEnvs" VALUE="true" />
193
+ <envs />
194
+ <EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
195
+ <EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
196
+ <RUBY_RUN_CONFIG NAME="SCRIPT_PATH" VALUE="$PROJECT_DIR$/lib/crawlfish/crawlfish.rb" />
197
+ <RUBY_RUN_CONFIG NAME="SCRIPT_ARGS" VALUE="" />
198
+ <RunnerSettings RunnerId="RubyRunner" />
199
+ <ConfigurationWrapper RunnerId="RubyRunner" />
200
+ <method />
201
+ </configuration>
202
+ <configuration default="true" type="RubyRunConfigurationType" factoryName="Ruby">
203
+ <module name="" />
204
+ <RUBY_RUN_CONFIG NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
205
+ <RUBY_RUN_CONFIG NAME="WORK DIR" VALUE="" />
206
+ <RUBY_RUN_CONFIG NAME="SHOULD_USE_SDK" VALUE="false" />
207
+ <RUBY_RUN_CONFIG NAME="ALTERN_SDK_NAME" VALUE="" />
208
+ <RUBY_RUN_CONFIG NAME="myPassParentEnvs" VALUE="true" />
209
+ <envs />
210
+ <EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
211
+ <EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
212
+ <RUBY_RUN_CONFIG NAME="SCRIPT_PATH" VALUE="" />
213
+ <RUBY_RUN_CONFIG NAME="SCRIPT_ARGS" VALUE="" />
214
+ <method />
215
+ </configuration>
216
+ <configuration default="true" type="TestUnitRunConfigurationType" factoryName="Test::Unit/Shoulda">
217
+ <predefined_log_file id="RUBY_TESTUNIT" enabled="true" />
218
+ <module name="" />
219
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
220
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="WORK DIR" VALUE="" />
221
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="SHOULD_USE_SDK" VALUE="false" />
222
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="ALTERN_SDK_NAME" VALUE="" />
223
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="myPassParentEnvs" VALUE="true" />
224
+ <envs />
225
+ <EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
226
+ <EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
227
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TESTS_FOLDER_PATH" VALUE="" />
228
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_SCRIPT_PATH" VALUE="" />
229
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_CLASS_NAME" VALUE="" />
230
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_FILE_MASK" VALUE="" />
231
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_METHOD_NAME" VALUE="" />
232
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_TEST_TYPE" VALUE="TEST_SCRIPT" />
233
+ <RTEST_RUN_CONFIG_SETTINGS_ID NAME="INHERITANCE_CHECK_DISABLED" VALUE="false" />
234
+ <method />
235
+ </configuration>
236
+ <list size="1">
237
+ <item index="0" class="java.lang.String" itemvalue="Ruby.crawlfish" />
238
+ </list>
239
+ </component>
240
+ <component name="ShelveChangesManager" show_recycled="false" />
241
+ <component name="SvnConfiguration" maxAnnotateRevisions="500">
242
+ <option name="USER" value="" />
243
+ <option name="PASSWORD" value="" />
244
+ <option name="LAST_MERGED_REVISION" />
245
+ <option name="MERGE_DRY_RUN" value="false" />
246
+ <option name="MERGE_DIFF_USE_ANCESTRY" value="true" />
247
+ <option name="UPDATE_LOCK_ON_DEMAND" value="false" />
248
+ <option name="IGNORE_SPACES_IN_MERGE" value="false" />
249
+ <option name="DETECT_NESTED_COPIES" value="true" />
250
+ <option name="CHECK_NESTED_FOR_QUICK_MERGE" value="false" />
251
+ <option name="IGNORE_SPACES_IN_ANNOTATE" value="true" />
252
+ <option name="SHOW_MERGE_SOURCES_IN_ANNOTATE" value="true" />
253
+ <option name="FORCE_UPDATE" value="false" />
254
+ <configuration useDefault="true">C:\Users\dan\AppData\Roaming\Subversion</configuration>
255
+ <myIsUseDefaultProxy>false</myIsUseDefaultProxy>
256
+ </component>
257
+ <component name="TaskManager">
258
+ <task active="true" id="Default" summary="Default task">
259
+ <created>1298757413423</created>
260
+ <updated>1298757413423</updated>
261
+ </task>
262
+ <servers />
263
+ </component>
264
+ <component name="ToolWindowManager">
265
+ <frame x="-8" y="-8" width="1696" height="1026" extended-state="6" />
266
+ <editor active="true" />
267
+ <layout>
268
+ <window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
269
+ <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
270
+ <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="true" content_ui="tabs" />
271
+ <window_info id="Dependency Viewer" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
272
+ <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.12062726" sideWeight="0.67001116" order="0" side_tool="false" content_ui="tabs" />
273
+ <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
274
+ <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32998884" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
275
+ <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
276
+ <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
277
+ <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
278
+ <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
279
+ <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
280
+ <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
281
+ <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
282
+ <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
283
+ </layout>
284
+ </component>
285
+ <component name="VcsManagerConfiguration">
286
+ <option name="OFFER_MOVE_TO_ANOTHER_CHANGELIST_ON_PARTIAL_COMMIT" value="true" />
287
+ <option name="CHECK_CODE_SMELLS_BEFORE_PROJECT_COMMIT" value="true" />
288
+ <option name="PERFORM_UPDATE_IN_BACKGROUND" value="true" />
289
+ <option name="PERFORM_COMMIT_IN_BACKGROUND" value="true" />
290
+ <option name="PERFORM_EDIT_IN_BACKGROUND" value="true" />
291
+ <option name="PERFORM_CHECKOUT_IN_BACKGROUND" value="true" />
292
+ <option name="PERFORM_ADD_REMOVE_IN_BACKGROUND" value="true" />
293
+ <option name="PERFORM_ROLLBACK_IN_BACKGROUND" value="false" />
294
+ <option name="CHECK_LOCALLY_CHANGED_CONFLICTS_IN_BACKGROUND" value="false" />
295
+ <option name="ENABLE_BACKGROUND_PROCESSES" value="false" />
296
+ <option name="CHANGED_ON_SERVER_INTERVAL" value="60" />
297
+ <option name="SHOW_ONLY_CHANGED_IN_SELECTION_DIFF" value="true" />
298
+ <option name="CHECK_COMMIT_MESSAGE_SPELLING" value="true" />
299
+ <option name="FORCE_NON_EMPTY_COMMENT" value="false" />
300
+ <option name="LAST_COMMIT_MESSAGE" />
301
+ <option name="MAKE_NEW_CHANGELIST_ACTIVE" value="true" />
302
+ <option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false" />
303
+ <option name="CHECK_FILES_UP_TO_DATE_BEFORE_COMMIT" value="false" />
304
+ <option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false" />
305
+ <option name="REFORMAT_BEFORE_FILE_COMMIT" value="false" />
306
+ <option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8" />
307
+ <option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5" />
308
+ <option name="ACTIVE_VCS_NAME" />
309
+ <option name="UPDATE_GROUP_BY_PACKAGES" value="false" />
310
+ <option name="UPDATE_GROUP_BY_CHANGELIST" value="false" />
311
+ <option name="SHOW_FILE_HISTORY_AS_TREE" value="false" />
312
+ <option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6" />
313
+ </component>
314
+ <component name="XDebuggerManager">
315
+ <breakpoint-manager />
316
+ </component>
317
+ <component name="editorHistoryManager">
318
+ <entry file="file://$PROJECT_DIR$/Rakefile">
319
+ <provider selected="true" editor-type-id="text-editor">
320
+ <state line="0" column="0" selection-start="0" selection-end="0" vertical-scroll-proportion="0.0">
321
+ <folding />
322
+ </state>
323
+ </provider>
324
+ </entry>
325
+ <entry file="file://$PROJECT_DIR$/Gemfile">
326
+ <provider selected="true" editor-type-id="text-editor">
327
+ <state line="4" column="0" selection-start="93" selection-end="93" vertical-scroll-proportion="0.0">
328
+ <folding />
329
+ </state>
330
+ </provider>
331
+ </entry>
332
+ <entry file="file://$PROJECT_DIR$/crawlfish.gemspec">
333
+ <provider selected="true" editor-type-id="text-editor">
334
+ <state line="10" column="21" selection-start="335" selection-end="335" vertical-scroll-proportion="0.0">
335
+ <folding />
336
+ </state>
337
+ </provider>
338
+ </entry>
339
+ <entry file="file://$PROJECT_DIR$/lib/crawlfish.rb">
340
+ <provider selected="true" editor-type-id="text-editor">
341
+ <state line="61" column="3" selection-start="1587" selection-end="1587" vertical-scroll-proportion="0.0">
342
+ <folding />
343
+ </state>
344
+ </provider>
345
+ </entry>
346
+ <entry file="file://$PROJECT_DIR$/lib/crawlfish/version.rb">
347
+ <provider selected="true" editor-type-id="text-editor">
348
+ <state line="1" column="18" selection-start="35" selection-end="35" vertical-scroll-proportion="0.01724138">
349
+ <folding />
350
+ </state>
351
+ </provider>
352
+ </entry>
353
+ </component>
354
+ </project>
355
+
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in crawlfish.gemspec
4
+ gemspec
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "crawlfish/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "crawlfish"
7
+ s.version = Crawlfish::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Dan Neumann"]
10
+ s.email = ["danneumanntx@gmail.com"]
11
+ s.homepage = "http://danneu.com"
12
+ s.summary = %q{Crawlfish the Search Engine Crawler}
13
+ s.description = %q{Crawls and scrapes search engine results pages.}
14
+
15
+ s.rubyforge_project = "crawlfish"
16
+
17
+ s.add_dependency "nokogiri"
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
+ s.require_paths = ["lib"]
23
+ end
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ module Crawlfish
5
+ class GoogleScraper
6
+ attr_accessor :website, :keyword, :user_agent, :start, :i, :position
7
+ def initialize(options)
8
+ @website = options[:website]
9
+ @keyword = options[:keyword]
10
+ @user_agent = "Windows IE 6"
11
+ @i = 1 # position (1-100)
12
+ @position = nil
13
+ @page = Page.new
14
+ end
15
+ def scrape
16
+ until @position or @page.current_page > 10
17
+ build_query
18
+ search_this_page
19
+ sleep 2000
20
+ next_page
21
+ end
22
+ @position ||= -1 # -1 if not found
23
+ {:position => @position, :measured_at => Time.now, :engine => "Google"}
24
+ end
25
+
26
+ # scrape helpers
27
+ def build_query
28
+ keyword = @keyword.split.join("+")
29
+ @url = "http://www.google.com/search?q=#{keyword}&start=#{@page.start_number}"
30
+ end
31
+ def search_this_page
32
+ doc = Nokogiri::HTML(open(@url, "User-Agent" => user_agent))
33
+ links = doc.xpath('//h3/a[contains(@class, "l")]')
34
+ # If links are empty, position is not found and the search ends
35
+ if links.empty?
36
+ @position = -1
37
+ return
38
+ end
39
+
40
+ host = URI::parse(URI::extract(result.url.to_s).first).host
41
+ if host == @domain
42
+ @position = result.rank
43
+ end
44
+ end
45
+ def next_page
46
+ @page.next_page
47
+ end
48
+
49
+ end
50
+ class Page
51
+ attr_accessor :current_page
52
+ def initialize
53
+ @current_page = 1
54
+ end
55
+ def start_number # convert current_page into the number for the query URL
56
+ (@current_page - 1) * 10
57
+ end
58
+ def next_page
59
+ @current_page += 1
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module Crawlfish
2
+ VERSION = "0.0.2"
3
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crawlfish
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 2
10
+ version: 0.0.2
11
+ platform: ruby
12
+ authors:
13
+ - Dan Neumann
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-26 00:00:00 -06:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Crawls and scrapes search engine results pages.
36
+ email:
37
+ - danneumanntx@gmail.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - .idea/.name
47
+ - .idea/.rakeTasks
48
+ - .idea/crawlfish.iml
49
+ - .idea/dictionaries/dan.xml
50
+ - .idea/encodings.xml
51
+ - .idea/misc.xml
52
+ - .idea/modules.xml
53
+ - .idea/vcs.xml
54
+ - .idea/workspace.xml
55
+ - Gemfile
56
+ - Rakefile
57
+ - crawlfish.gemspec
58
+ - lib/crawlfish.rb
59
+ - lib/crawlfish/version.rb
60
+ has_rdoc: true
61
+ homepage: http://danneu.com
62
+ licenses: []
63
+
64
+ post_install_message:
65
+ rdoc_options: []
66
+
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 3
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ hash: 3
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ requirements: []
88
+
89
+ rubyforge_project: crawlfish
90
+ rubygems_version: 1.3.7
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: Crawlfish the Search Engine Crawler
94
+ test_files: []
95
+