crawlfish 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/.idea/.name +1 -0
- data/.idea/.rakeTasks +7 -0
- data/.idea/crawlfish.iml +10 -0
- data/.idea/dictionaries/dan.xml +7 -0
- data/.idea/encodings.xml +5 -0
- data/.idea/misc.xml +8 -0
- data/.idea/modules.xml +9 -0
- data/.idea/vcs.xml +7 -0
- data/.idea/workspace.xml +355 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/crawlfish.gemspec +23 -0
- data/lib/crawlfish.rb +62 -0
- data/lib/crawlfish/version.rb +3 -0
- metadata +95 -0
data/.gitignore
ADDED
data/.idea/.name
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
crawlfish
|
data/.idea/.rakeTasks
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<Settings><!--This file was automatically generated by Ruby plugin.
|
|
3
|
+
You are allowed to:
|
|
4
|
+
1. Remove rake task
|
|
5
|
+
2. Add existing rake tasks
|
|
6
|
+
To add existing rake tasks automatically delete this file and reload the project.
|
|
7
|
+
--><RakeGroup description="" fullCmd="" taksId="rake"><RakeTask description="Build crawlfish-0.0.1.gem into the pkg directory" fullCmd="build" taksId="build" /><RakeTask description="Build and install crawlfish-0.0.1.gem into system gems" fullCmd="install" taksId="install" /><RakeTask description="Create tag v0.0.1 and build and push crawlfish-0.0.1.gem to Rubygems" fullCmd="release" taksId="release" /></RakeGroup></Settings>
|
data/.idea/crawlfish.iml
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
|
3
|
+
<component name="NewModuleRootManager">
|
|
4
|
+
<content url="file://$MODULE_DIR$" />
|
|
5
|
+
<orderEntry type="inheritedJdk" />
|
|
6
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
|
7
|
+
<orderEntry type="library" scope="PROVIDED" name="[gem] bundler (v1.0.10, C:/RailsInstaller/Ruby1.8.7/lib/ruby/gems/1.8/gems/bundler-1.0.10)" level="application" />
|
|
8
|
+
</component>
|
|
9
|
+
</module>
|
|
10
|
+
|
data/.idea/encodings.xml
ADDED
data/.idea/misc.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<project version="4">
|
|
3
|
+
<component name="DependencyValidationManager">
|
|
4
|
+
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
|
|
5
|
+
</component>
|
|
6
|
+
<component name="ProjectRootManager" version="2" project-jdk-name="Ruby SDK 1.8.7-p330" project-jdk-type="RUBY_SDK" />
|
|
7
|
+
</project>
|
|
8
|
+
|
data/.idea/modules.xml
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<project version="4">
|
|
3
|
+
<component name="ProjectModuleManager">
|
|
4
|
+
<modules>
|
|
5
|
+
<module fileurl="file://$PROJECT_DIR$/.idea/crawlfish.iml" filepath="$PROJECT_DIR$/.idea/crawlfish.iml" />
|
|
6
|
+
</modules>
|
|
7
|
+
</component>
|
|
8
|
+
</project>
|
|
9
|
+
|
data/.idea/vcs.xml
ADDED
data/.idea/workspace.xml
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<project version="4">
|
|
3
|
+
<component name="ChangeListManager">
|
|
4
|
+
<list default="true" id="52814ec4-7729-4991-a410-d3f62df9d527" name="Default" comment="">
|
|
5
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/.name" />
|
|
6
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/.rakeTasks" />
|
|
7
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/crawlfish.iml" />
|
|
8
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.gitignore" />
|
|
9
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/dictionaries/dan.xml" />
|
|
10
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/encodings.xml" />
|
|
11
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/misc.xml" />
|
|
12
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/modules.xml" />
|
|
13
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/vcs.xml" />
|
|
14
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
|
15
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/Gemfile" />
|
|
16
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/Rakefile" />
|
|
17
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/crawlfish.gemspec" />
|
|
18
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/lib/crawlfish.rb" />
|
|
19
|
+
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/lib/crawlfish/version.rb" />
|
|
20
|
+
</list>
|
|
21
|
+
<ignored path="crawlfish.iws" />
|
|
22
|
+
<ignored path=".idea/workspace.xml" />
|
|
23
|
+
<option name="TRACKING_ENABLED" value="true" />
|
|
24
|
+
<option name="SHOW_DIALOG" value="false" />
|
|
25
|
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
|
26
|
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
|
27
|
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
|
28
|
+
</component>
|
|
29
|
+
<component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
|
|
30
|
+
<component name="CoverageDataManager" choice="3" />
|
|
31
|
+
<component name="CreatePatchCommitExecutor">
|
|
32
|
+
<option name="PATCH_PATH" value="" />
|
|
33
|
+
<option name="REVERSE_PATCH" value="false" />
|
|
34
|
+
</component>
|
|
35
|
+
<component name="DaemonCodeAnalyzer">
|
|
36
|
+
<disable_hints />
|
|
37
|
+
</component>
|
|
38
|
+
<component name="FavoritesManager">
|
|
39
|
+
<favorites_list name="crawlfish" />
|
|
40
|
+
</component>
|
|
41
|
+
<component name="FileEditorManager">
|
|
42
|
+
<leaf>
|
|
43
|
+
<file leaf-file-name="crawlfish.gemspec" pinned="false" current="false" current-in-tab="false">
|
|
44
|
+
<entry file="file://$PROJECT_DIR$/crawlfish.gemspec">
|
|
45
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
46
|
+
<state line="10" column="21" selection-start="335" selection-end="335" vertical-scroll-proportion="0.0">
|
|
47
|
+
<folding />
|
|
48
|
+
</state>
|
|
49
|
+
</provider>
|
|
50
|
+
</entry>
|
|
51
|
+
</file>
|
|
52
|
+
<file leaf-file-name="version.rb" pinned="false" current="true" current-in-tab="true">
|
|
53
|
+
<entry file="file://$PROJECT_DIR$/lib/crawlfish/version.rb">
|
|
54
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
55
|
+
<state line="1" column="18" selection-start="35" selection-end="35" vertical-scroll-proportion="0.01724138">
|
|
56
|
+
<folding />
|
|
57
|
+
</state>
|
|
58
|
+
</provider>
|
|
59
|
+
</entry>
|
|
60
|
+
</file>
|
|
61
|
+
<file leaf-file-name="Gemfile" pinned="false" current="false" current-in-tab="false">
|
|
62
|
+
<entry file="file://$PROJECT_DIR$/Gemfile">
|
|
63
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
64
|
+
<state line="4" column="0" selection-start="93" selection-end="93" vertical-scroll-proportion="0.0">
|
|
65
|
+
<folding />
|
|
66
|
+
</state>
|
|
67
|
+
</provider>
|
|
68
|
+
</entry>
|
|
69
|
+
</file>
|
|
70
|
+
<file leaf-file-name="crawlfish.rb" pinned="false" current="false" current-in-tab="false">
|
|
71
|
+
<entry file="file://$PROJECT_DIR$/lib/crawlfish.rb">
|
|
72
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
73
|
+
<state line="61" column="3" selection-start="1587" selection-end="1587" vertical-scroll-proportion="0.0">
|
|
74
|
+
<folding />
|
|
75
|
+
</state>
|
|
76
|
+
</provider>
|
|
77
|
+
</entry>
|
|
78
|
+
</file>
|
|
79
|
+
</leaf>
|
|
80
|
+
</component>
|
|
81
|
+
<component name="FindManager">
|
|
82
|
+
<FindUsagesManager>
|
|
83
|
+
<setting name="OPEN_NEW_TAB" value="false" />
|
|
84
|
+
</FindUsagesManager>
|
|
85
|
+
</component>
|
|
86
|
+
<component name="Git.Settings">
|
|
87
|
+
<option name="CHECKOUT_INCLUDE_TAGS" value="false" />
|
|
88
|
+
<option name="UPDATE_CHANGES_POLICY" value="STASH" />
|
|
89
|
+
</component>
|
|
90
|
+
<component name="IdeDocumentHistory">
|
|
91
|
+
<option name="changedFiles">
|
|
92
|
+
<list>
|
|
93
|
+
<option value="$PROJECT_DIR$/crawlfish.gemspec" />
|
|
94
|
+
<option value="$PROJECT_DIR$/lib/crawlfish/crawlfish.rb" />
|
|
95
|
+
<option value="$PROJECT_DIR$/lib/crawlfish.rb" />
|
|
96
|
+
<option value="$PROJECT_DIR$/lib/crawlfish/version.rb" />
|
|
97
|
+
</list>
|
|
98
|
+
</option>
|
|
99
|
+
</component>
|
|
100
|
+
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
|
|
101
|
+
<OptionsSetting value="true" id="Add" />
|
|
102
|
+
<OptionsSetting value="true" id="Remove" />
|
|
103
|
+
<OptionsSetting value="true" id="Checkout" />
|
|
104
|
+
<OptionsSetting value="true" id="Update" />
|
|
105
|
+
<OptionsSetting value="true" id="Status" />
|
|
106
|
+
<OptionsSetting value="true" id="Edit" />
|
|
107
|
+
<ConfirmationsSetting value="2" id="Add" />
|
|
108
|
+
<ConfirmationsSetting value="0" id="Remove" />
|
|
109
|
+
</component>
|
|
110
|
+
<component name="ProjectReloadState">
|
|
111
|
+
<option name="STATE" value="0" />
|
|
112
|
+
</component>
|
|
113
|
+
<component name="ProjectView">
|
|
114
|
+
<navigator currentView="ProjectPane" proportions="" version="1" splitterProportion="0.5">
|
|
115
|
+
<flattenPackages />
|
|
116
|
+
<showMembers />
|
|
117
|
+
<showModules />
|
|
118
|
+
<showLibraryContents />
|
|
119
|
+
<hideEmptyPackages />
|
|
120
|
+
<abbreviatePackageNames />
|
|
121
|
+
<autoscrollToSource />
|
|
122
|
+
<autoscrollFromSource />
|
|
123
|
+
<sortByType />
|
|
124
|
+
</navigator>
|
|
125
|
+
<panes>
|
|
126
|
+
<pane id="Favorites" />
|
|
127
|
+
<pane id="ProjectPane">
|
|
128
|
+
<subPane>
|
|
129
|
+
<PATH>
|
|
130
|
+
<PATH_ELEMENT>
|
|
131
|
+
<option name="myItemId" value="crawlfish" />
|
|
132
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
|
133
|
+
</PATH_ELEMENT>
|
|
134
|
+
</PATH>
|
|
135
|
+
<PATH>
|
|
136
|
+
<PATH_ELEMENT>
|
|
137
|
+
<option name="myItemId" value="crawlfish" />
|
|
138
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
|
139
|
+
</PATH_ELEMENT>
|
|
140
|
+
<PATH_ELEMENT>
|
|
141
|
+
<option name="myItemId" value="crawlfish" />
|
|
142
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
|
143
|
+
</PATH_ELEMENT>
|
|
144
|
+
</PATH>
|
|
145
|
+
<PATH>
|
|
146
|
+
<PATH_ELEMENT>
|
|
147
|
+
<option name="myItemId" value="crawlfish" />
|
|
148
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
|
149
|
+
</PATH_ELEMENT>
|
|
150
|
+
<PATH_ELEMENT>
|
|
151
|
+
<option name="myItemId" value="crawlfish" />
|
|
152
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
|
153
|
+
</PATH_ELEMENT>
|
|
154
|
+
<PATH_ELEMENT>
|
|
155
|
+
<option name="myItemId" value="lib" />
|
|
156
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
|
157
|
+
</PATH_ELEMENT>
|
|
158
|
+
</PATH>
|
|
159
|
+
<PATH>
|
|
160
|
+
<PATH_ELEMENT>
|
|
161
|
+
<option name="myItemId" value="crawlfish" />
|
|
162
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
|
163
|
+
</PATH_ELEMENT>
|
|
164
|
+
<PATH_ELEMENT>
|
|
165
|
+
<option name="myItemId" value="crawlfish" />
|
|
166
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
|
167
|
+
</PATH_ELEMENT>
|
|
168
|
+
<PATH_ELEMENT>
|
|
169
|
+
<option name="myItemId" value="lib" />
|
|
170
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
|
171
|
+
</PATH_ELEMENT>
|
|
172
|
+
<PATH_ELEMENT>
|
|
173
|
+
<option name="myItemId" value="crawlfish" />
|
|
174
|
+
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
|
175
|
+
</PATH_ELEMENT>
|
|
176
|
+
</PATH>
|
|
177
|
+
</subPane>
|
|
178
|
+
</pane>
|
|
179
|
+
<pane id="Scope" />
|
|
180
|
+
</panes>
|
|
181
|
+
</component>
|
|
182
|
+
<component name="PropertiesComponent">
|
|
183
|
+
<property name="recentsLimit" value="5" />
|
|
184
|
+
</component>
|
|
185
|
+
<component name="RunManager" selected="Ruby.crawlfish">
|
|
186
|
+
<configuration default="false" name="crawlfish" type="RubyRunConfigurationType" factoryName="Ruby" temporary="true">
|
|
187
|
+
<module name="crawlfish" />
|
|
188
|
+
<RUBY_RUN_CONFIG NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
|
|
189
|
+
<RUBY_RUN_CONFIG NAME="WORK DIR" VALUE="$PROJECT_DIR$/lib/crawlfish" />
|
|
190
|
+
<RUBY_RUN_CONFIG NAME="SHOULD_USE_SDK" VALUE="false" />
|
|
191
|
+
<RUBY_RUN_CONFIG NAME="ALTERN_SDK_NAME" VALUE="" />
|
|
192
|
+
<RUBY_RUN_CONFIG NAME="myPassParentEnvs" VALUE="true" />
|
|
193
|
+
<envs />
|
|
194
|
+
<EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
|
|
195
|
+
<EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
|
|
196
|
+
<RUBY_RUN_CONFIG NAME="SCRIPT_PATH" VALUE="$PROJECT_DIR$/lib/crawlfish/crawlfish.rb" />
|
|
197
|
+
<RUBY_RUN_CONFIG NAME="SCRIPT_ARGS" VALUE="" />
|
|
198
|
+
<RunnerSettings RunnerId="RubyRunner" />
|
|
199
|
+
<ConfigurationWrapper RunnerId="RubyRunner" />
|
|
200
|
+
<method />
|
|
201
|
+
</configuration>
|
|
202
|
+
<configuration default="true" type="RubyRunConfigurationType" factoryName="Ruby">
|
|
203
|
+
<module name="" />
|
|
204
|
+
<RUBY_RUN_CONFIG NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
|
|
205
|
+
<RUBY_RUN_CONFIG NAME="WORK DIR" VALUE="" />
|
|
206
|
+
<RUBY_RUN_CONFIG NAME="SHOULD_USE_SDK" VALUE="false" />
|
|
207
|
+
<RUBY_RUN_CONFIG NAME="ALTERN_SDK_NAME" VALUE="" />
|
|
208
|
+
<RUBY_RUN_CONFIG NAME="myPassParentEnvs" VALUE="true" />
|
|
209
|
+
<envs />
|
|
210
|
+
<EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
|
|
211
|
+
<EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
|
|
212
|
+
<RUBY_RUN_CONFIG NAME="SCRIPT_PATH" VALUE="" />
|
|
213
|
+
<RUBY_RUN_CONFIG NAME="SCRIPT_ARGS" VALUE="" />
|
|
214
|
+
<method />
|
|
215
|
+
</configuration>
|
|
216
|
+
<configuration default="true" type="TestUnitRunConfigurationType" factoryName="Test::Unit/Shoulda">
|
|
217
|
+
<predefined_log_file id="RUBY_TESTUNIT" enabled="true" />
|
|
218
|
+
<module name="" />
|
|
219
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="RUBY_ARGS" VALUE="-e $stdout.sync=true;$stderr.sync=true;load($0=ARGV.shift)" />
|
|
220
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="WORK DIR" VALUE="" />
|
|
221
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="SHOULD_USE_SDK" VALUE="false" />
|
|
222
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="ALTERN_SDK_NAME" VALUE="" />
|
|
223
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="myPassParentEnvs" VALUE="true" />
|
|
224
|
+
<envs />
|
|
225
|
+
<EXTENSION ID="BundlerRunConfigurationExtension" bundleExecEnabled="false" />
|
|
226
|
+
<EXTENSION ID="RubyCoverageRunConfigurationExtension" enabled="false" track_test_folders="true" runner="rcov" />
|
|
227
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TESTS_FOLDER_PATH" VALUE="" />
|
|
228
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_SCRIPT_PATH" VALUE="" />
|
|
229
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_CLASS_NAME" VALUE="" />
|
|
230
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_FILE_MASK" VALUE="" />
|
|
231
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_METHOD_NAME" VALUE="" />
|
|
232
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="TEST_TEST_TYPE" VALUE="TEST_SCRIPT" />
|
|
233
|
+
<RTEST_RUN_CONFIG_SETTINGS_ID NAME="INHERITANCE_CHECK_DISABLED" VALUE="false" />
|
|
234
|
+
<method />
|
|
235
|
+
</configuration>
|
|
236
|
+
<list size="1">
|
|
237
|
+
<item index="0" class="java.lang.String" itemvalue="Ruby.crawlfish" />
|
|
238
|
+
</list>
|
|
239
|
+
</component>
|
|
240
|
+
<component name="ShelveChangesManager" show_recycled="false" />
|
|
241
|
+
<component name="SvnConfiguration" maxAnnotateRevisions="500">
|
|
242
|
+
<option name="USER" value="" />
|
|
243
|
+
<option name="PASSWORD" value="" />
|
|
244
|
+
<option name="LAST_MERGED_REVISION" />
|
|
245
|
+
<option name="MERGE_DRY_RUN" value="false" />
|
|
246
|
+
<option name="MERGE_DIFF_USE_ANCESTRY" value="true" />
|
|
247
|
+
<option name="UPDATE_LOCK_ON_DEMAND" value="false" />
|
|
248
|
+
<option name="IGNORE_SPACES_IN_MERGE" value="false" />
|
|
249
|
+
<option name="DETECT_NESTED_COPIES" value="true" />
|
|
250
|
+
<option name="CHECK_NESTED_FOR_QUICK_MERGE" value="false" />
|
|
251
|
+
<option name="IGNORE_SPACES_IN_ANNOTATE" value="true" />
|
|
252
|
+
<option name="SHOW_MERGE_SOURCES_IN_ANNOTATE" value="true" />
|
|
253
|
+
<option name="FORCE_UPDATE" value="false" />
|
|
254
|
+
<configuration useDefault="true">C:\Users\dan\AppData\Roaming\Subversion</configuration>
|
|
255
|
+
<myIsUseDefaultProxy>false</myIsUseDefaultProxy>
|
|
256
|
+
</component>
|
|
257
|
+
<component name="TaskManager">
|
|
258
|
+
<task active="true" id="Default" summary="Default task">
|
|
259
|
+
<created>1298757413423</created>
|
|
260
|
+
<updated>1298757413423</updated>
|
|
261
|
+
</task>
|
|
262
|
+
<servers />
|
|
263
|
+
</component>
|
|
264
|
+
<component name="ToolWindowManager">
|
|
265
|
+
<frame x="-8" y="-8" width="1696" height="1026" extended-state="6" />
|
|
266
|
+
<editor active="true" />
|
|
267
|
+
<layout>
|
|
268
|
+
<window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
|
269
|
+
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
|
|
270
|
+
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="true" content_ui="tabs" />
|
|
271
|
+
<window_info id="Dependency Viewer" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
|
272
|
+
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.12062726" sideWeight="0.67001116" order="0" side_tool="false" content_ui="tabs" />
|
|
273
|
+
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
|
274
|
+
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32998884" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
|
275
|
+
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
|
276
|
+
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
|
|
277
|
+
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
|
278
|
+
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
|
279
|
+
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
|
280
|
+
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
|
281
|
+
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
|
|
282
|
+
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
|
|
283
|
+
</layout>
|
|
284
|
+
</component>
|
|
285
|
+
<component name="VcsManagerConfiguration">
|
|
286
|
+
<option name="OFFER_MOVE_TO_ANOTHER_CHANGELIST_ON_PARTIAL_COMMIT" value="true" />
|
|
287
|
+
<option name="CHECK_CODE_SMELLS_BEFORE_PROJECT_COMMIT" value="true" />
|
|
288
|
+
<option name="PERFORM_UPDATE_IN_BACKGROUND" value="true" />
|
|
289
|
+
<option name="PERFORM_COMMIT_IN_BACKGROUND" value="true" />
|
|
290
|
+
<option name="PERFORM_EDIT_IN_BACKGROUND" value="true" />
|
|
291
|
+
<option name="PERFORM_CHECKOUT_IN_BACKGROUND" value="true" />
|
|
292
|
+
<option name="PERFORM_ADD_REMOVE_IN_BACKGROUND" value="true" />
|
|
293
|
+
<option name="PERFORM_ROLLBACK_IN_BACKGROUND" value="false" />
|
|
294
|
+
<option name="CHECK_LOCALLY_CHANGED_CONFLICTS_IN_BACKGROUND" value="false" />
|
|
295
|
+
<option name="ENABLE_BACKGROUND_PROCESSES" value="false" />
|
|
296
|
+
<option name="CHANGED_ON_SERVER_INTERVAL" value="60" />
|
|
297
|
+
<option name="SHOW_ONLY_CHANGED_IN_SELECTION_DIFF" value="true" />
|
|
298
|
+
<option name="CHECK_COMMIT_MESSAGE_SPELLING" value="true" />
|
|
299
|
+
<option name="FORCE_NON_EMPTY_COMMENT" value="false" />
|
|
300
|
+
<option name="LAST_COMMIT_MESSAGE" />
|
|
301
|
+
<option name="MAKE_NEW_CHANGELIST_ACTIVE" value="true" />
|
|
302
|
+
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false" />
|
|
303
|
+
<option name="CHECK_FILES_UP_TO_DATE_BEFORE_COMMIT" value="false" />
|
|
304
|
+
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false" />
|
|
305
|
+
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false" />
|
|
306
|
+
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8" />
|
|
307
|
+
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5" />
|
|
308
|
+
<option name="ACTIVE_VCS_NAME" />
|
|
309
|
+
<option name="UPDATE_GROUP_BY_PACKAGES" value="false" />
|
|
310
|
+
<option name="UPDATE_GROUP_BY_CHANGELIST" value="false" />
|
|
311
|
+
<option name="SHOW_FILE_HISTORY_AS_TREE" value="false" />
|
|
312
|
+
<option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6" />
|
|
313
|
+
</component>
|
|
314
|
+
<component name="XDebuggerManager">
|
|
315
|
+
<breakpoint-manager />
|
|
316
|
+
</component>
|
|
317
|
+
<component name="editorHistoryManager">
|
|
318
|
+
<entry file="file://$PROJECT_DIR$/Rakefile">
|
|
319
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
320
|
+
<state line="0" column="0" selection-start="0" selection-end="0" vertical-scroll-proportion="0.0">
|
|
321
|
+
<folding />
|
|
322
|
+
</state>
|
|
323
|
+
</provider>
|
|
324
|
+
</entry>
|
|
325
|
+
<entry file="file://$PROJECT_DIR$/Gemfile">
|
|
326
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
327
|
+
<state line="4" column="0" selection-start="93" selection-end="93" vertical-scroll-proportion="0.0">
|
|
328
|
+
<folding />
|
|
329
|
+
</state>
|
|
330
|
+
</provider>
|
|
331
|
+
</entry>
|
|
332
|
+
<entry file="file://$PROJECT_DIR$/crawlfish.gemspec">
|
|
333
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
334
|
+
<state line="10" column="21" selection-start="335" selection-end="335" vertical-scroll-proportion="0.0">
|
|
335
|
+
<folding />
|
|
336
|
+
</state>
|
|
337
|
+
</provider>
|
|
338
|
+
</entry>
|
|
339
|
+
<entry file="file://$PROJECT_DIR$/lib/crawlfish.rb">
|
|
340
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
341
|
+
<state line="61" column="3" selection-start="1587" selection-end="1587" vertical-scroll-proportion="0.0">
|
|
342
|
+
<folding />
|
|
343
|
+
</state>
|
|
344
|
+
</provider>
|
|
345
|
+
</entry>
|
|
346
|
+
<entry file="file://$PROJECT_DIR$/lib/crawlfish/version.rb">
|
|
347
|
+
<provider selected="true" editor-type-id="text-editor">
|
|
348
|
+
<state line="1" column="18" selection-start="35" selection-end="35" vertical-scroll-proportion="0.01724138">
|
|
349
|
+
<folding />
|
|
350
|
+
</state>
|
|
351
|
+
</provider>
|
|
352
|
+
</entry>
|
|
353
|
+
</component>
|
|
354
|
+
</project>
|
|
355
|
+
|
data/Gemfile
ADDED
data/Rakefile
ADDED
data/crawlfish.gemspec
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
|
3
|
+
require "crawlfish/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |s|
|
|
6
|
+
s.name = "crawlfish"
|
|
7
|
+
s.version = Crawlfish::VERSION
|
|
8
|
+
s.platform = Gem::Platform::RUBY
|
|
9
|
+
s.authors = ["Dan Neumann"]
|
|
10
|
+
s.email = ["danneumanntx@gmail.com"]
|
|
11
|
+
s.homepage = "http://danneu.com"
|
|
12
|
+
s.summary = %q{Crawlfish the Search Engine Crawler}
|
|
13
|
+
s.description = %q{Crawls and scrapes search engine results pages.}
|
|
14
|
+
|
|
15
|
+
s.rubyforge_project = "crawlfish"
|
|
16
|
+
|
|
17
|
+
s.add_dependency "nokogiri"
|
|
18
|
+
|
|
19
|
+
s.files = `git ls-files`.split("\n")
|
|
20
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
21
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
|
22
|
+
s.require_paths = ["lib"]
|
|
23
|
+
end
|
data/lib/crawlfish.rb
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'open-uri'
|
|
3
|
+
|
|
4
|
+
module Crawlfish
|
|
5
|
+
class GoogleScraper
|
|
6
|
+
attr_accessor :website, :keyword, :user_agent, :start, :i, :position
|
|
7
|
+
def initialize(options)
|
|
8
|
+
@website = options[:website]
|
|
9
|
+
@keyword = options[:keyword]
|
|
10
|
+
@user_agent = "Windows IE 6"
|
|
11
|
+
@i = 1 # position (1-100)
|
|
12
|
+
@position = nil
|
|
13
|
+
@page = Page.new
|
|
14
|
+
end
|
|
15
|
+
def scrape
|
|
16
|
+
until @position or @page.current_page > 10
|
|
17
|
+
build_query
|
|
18
|
+
search_this_page
|
|
19
|
+
sleep 2000
|
|
20
|
+
next_page
|
|
21
|
+
end
|
|
22
|
+
@position ||= -1 # -1 if not found
|
|
23
|
+
{:position => @position, :measured_at => Time.now, :engine => "Google"}
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# scrape helpers
|
|
27
|
+
def build_query
|
|
28
|
+
keyword = @keyword.split.join("+")
|
|
29
|
+
@url = "http://www.google.com/search?q=#{keyword}&start=#{@page.start_number}"
|
|
30
|
+
end
|
|
31
|
+
def search_this_page
|
|
32
|
+
doc = Nokogiri::HTML(open(@url, "User-Agent" => user_agent))
|
|
33
|
+
links = doc.xpath('//h3/a[contains(@class, "l")]')
|
|
34
|
+
# If links are empty, position is not found and the search ends
|
|
35
|
+
if links.empty?
|
|
36
|
+
@position = -1
|
|
37
|
+
return
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
host = URI::parse(URI::extract(result.url.to_s).first).host
|
|
41
|
+
if host == @domain
|
|
42
|
+
@position = result.rank
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
def next_page
|
|
46
|
+
@page.next_page
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
end
|
|
50
|
+
class Page
|
|
51
|
+
attr_accessor :current_page
|
|
52
|
+
def initialize
|
|
53
|
+
@current_page = 1
|
|
54
|
+
end
|
|
55
|
+
def start_number # convert current_page into the number for the query URL
|
|
56
|
+
(@current_page - 1) * 10
|
|
57
|
+
end
|
|
58
|
+
def next_page
|
|
59
|
+
@current_page += 1
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: crawlfish
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 27
|
|
5
|
+
prerelease: false
|
|
6
|
+
segments:
|
|
7
|
+
- 0
|
|
8
|
+
- 0
|
|
9
|
+
- 2
|
|
10
|
+
version: 0.0.2
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- Dan Neumann
|
|
14
|
+
autorequire:
|
|
15
|
+
bindir: bin
|
|
16
|
+
cert_chain: []
|
|
17
|
+
|
|
18
|
+
date: 2011-02-26 00:00:00 -06:00
|
|
19
|
+
default_executable:
|
|
20
|
+
dependencies:
|
|
21
|
+
- !ruby/object:Gem::Dependency
|
|
22
|
+
name: nokogiri
|
|
23
|
+
prerelease: false
|
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
26
|
+
requirements:
|
|
27
|
+
- - ">="
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
hash: 3
|
|
30
|
+
segments:
|
|
31
|
+
- 0
|
|
32
|
+
version: "0"
|
|
33
|
+
type: :runtime
|
|
34
|
+
version_requirements: *id001
|
|
35
|
+
description: Crawls and scrapes search engine results pages.
|
|
36
|
+
email:
|
|
37
|
+
- danneumanntx@gmail.com
|
|
38
|
+
executables: []
|
|
39
|
+
|
|
40
|
+
extensions: []
|
|
41
|
+
|
|
42
|
+
extra_rdoc_files: []
|
|
43
|
+
|
|
44
|
+
files:
|
|
45
|
+
- .gitignore
|
|
46
|
+
- .idea/.name
|
|
47
|
+
- .idea/.rakeTasks
|
|
48
|
+
- .idea/crawlfish.iml
|
|
49
|
+
- .idea/dictionaries/dan.xml
|
|
50
|
+
- .idea/encodings.xml
|
|
51
|
+
- .idea/misc.xml
|
|
52
|
+
- .idea/modules.xml
|
|
53
|
+
- .idea/vcs.xml
|
|
54
|
+
- .idea/workspace.xml
|
|
55
|
+
- Gemfile
|
|
56
|
+
- Rakefile
|
|
57
|
+
- crawlfish.gemspec
|
|
58
|
+
- lib/crawlfish.rb
|
|
59
|
+
- lib/crawlfish/version.rb
|
|
60
|
+
has_rdoc: true
|
|
61
|
+
homepage: http://danneu.com
|
|
62
|
+
licenses: []
|
|
63
|
+
|
|
64
|
+
post_install_message:
|
|
65
|
+
rdoc_options: []
|
|
66
|
+
|
|
67
|
+
require_paths:
|
|
68
|
+
- lib
|
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
70
|
+
none: false
|
|
71
|
+
requirements:
|
|
72
|
+
- - ">="
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
hash: 3
|
|
75
|
+
segments:
|
|
76
|
+
- 0
|
|
77
|
+
version: "0"
|
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
|
+
none: false
|
|
80
|
+
requirements:
|
|
81
|
+
- - ">="
|
|
82
|
+
- !ruby/object:Gem::Version
|
|
83
|
+
hash: 3
|
|
84
|
+
segments:
|
|
85
|
+
- 0
|
|
86
|
+
version: "0"
|
|
87
|
+
requirements: []
|
|
88
|
+
|
|
89
|
+
rubyforge_project: crawlfish
|
|
90
|
+
rubygems_version: 1.3.7
|
|
91
|
+
signing_key:
|
|
92
|
+
specification_version: 3
|
|
93
|
+
summary: Crawlfish the Search Engine Crawler
|
|
94
|
+
test_files: []
|
|
95
|
+
|