is_crawler 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +8 -2
- data/lib/crawler.rb +23 -19
- data/lib/is_crawler/version.rb +1 -1
- data/spec/lib/crawler_spec.rb +40 -0
- metadata +3 -3
data/README.md
CHANGED
@@ -19,7 +19,7 @@ Or install it yourself as:
|
|
19
19
|
|
20
20
|
## Usage
|
21
21
|
|
22
|
-
You can use the `is_crawler?` method with just a user agent string to determine if the that string matches *any*
|
22
|
+
You can use the `is_crawler?` method with just a user agent string to determine if the that string matches *any* known crawler, like so:
|
23
23
|
|
24
24
|
class MyController < ActionController::Base
|
25
25
|
include IsCrawler
|
@@ -32,7 +32,13 @@ You can use the `is_crawler?` method with just a user agent string to determine
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
...or provide one or more crawlers `is_crawler?("Some User Agent/1.0", :facebook, :google)`
|
35
|
+
...or provide one or more crawlers to find out if the string matches *specific* crawlers: `is_crawler?("Some User Agent/1.0", :facebook, :google)`
|
36
|
+
|
37
|
+
You can also define custom crawlers like this:
|
38
|
+
|
39
|
+
Crawler::CUSTOM << Crawler.new(:custom_crawler_name, "string that is always present in crawler requests")
|
40
|
+
|
41
|
+
That's it!
|
36
42
|
|
37
43
|
## Contributing
|
38
44
|
|
data/lib/crawler.rb
CHANGED
@@ -1,25 +1,29 @@
|
|
1
1
|
class Crawler < Struct.new(:name, :ua_string)
|
2
|
+
DEFAULT = [
|
3
|
+
Crawler.new(:addthis, "AddThis.com"),
|
4
|
+
Crawler.new(:alexa, "ia_archiver"),
|
5
|
+
Crawler.new(:archive_org, "archive.org_bot"),
|
6
|
+
Crawler.new(:bing, "bingbot"),
|
7
|
+
Crawler.new(:bitly, "bitlybot"),
|
8
|
+
Crawler.new(:exabot, "Exabot"),
|
9
|
+
Crawler.new(:facebook, "facebookexternalhit"),
|
10
|
+
Crawler.new(:flipboard, "FlipboardProxy"),
|
11
|
+
Crawler.new(:google, "Googlebot"),
|
12
|
+
Crawler.new(:google_web_preview, "Google Web Preview"),
|
13
|
+
Crawler.new(:msn, "MSNBot"),
|
14
|
+
Crawler.new(:openwebspider, "OpenWebSpider"),
|
15
|
+
Crawler.new(:technorati, "Technoratibot"),
|
16
|
+
Crawler.new(:twitter, "Twitterbot"),
|
17
|
+
Crawler.new(:yahoo, "Yahoo! Slurp"),
|
18
|
+
Crawler.new(:yahoo_jp, "Y!J"),
|
19
|
+
Crawler.new(:yandex, "Yandex")
|
20
|
+
].freeze
|
21
|
+
|
22
|
+
CUSTOM = []
|
23
|
+
|
2
24
|
class << self
|
3
25
|
def all
|
4
|
-
|
5
|
-
Crawler.new(:addthis, "AddThis.com"),
|
6
|
-
Crawler.new(:alexa, "ia_archiver"),
|
7
|
-
Crawler.new(:archive_org, "archive.org_bot"),
|
8
|
-
Crawler.new(:bing, "bingbot"),
|
9
|
-
Crawler.new(:bitly, "bitlybot"),
|
10
|
-
Crawler.new(:exabot, "Exabot"),
|
11
|
-
Crawler.new(:facebook, "facebookexternalhit"),
|
12
|
-
Crawler.new(:flipboard, "FlipboardProxy"),
|
13
|
-
Crawler.new(:google, "Googlebot"),
|
14
|
-
Crawler.new(:google_web_preview, "Google Web Preview"),
|
15
|
-
Crawler.new(:msn, "MSNBot"),
|
16
|
-
Crawler.new(:openwebspider, "OpenWebSpider"),
|
17
|
-
Crawler.new(:technorati, "Technoratibot"),
|
18
|
-
Crawler.new(:twitter, "Twitterbot"),
|
19
|
-
Crawler.new(:yahoo, "Yahoo! Slurp"),
|
20
|
-
Crawler.new(:yahoo_jp, "Y!J"),
|
21
|
-
Crawler.new(:yandex, "Yandex")
|
22
|
-
]
|
26
|
+
DEFAULT + CUSTOM
|
23
27
|
end
|
24
28
|
|
25
29
|
def matches_any? user_agent
|
data/lib/is_crawler/version.rb
CHANGED
data/spec/lib/crawler_spec.rb
CHANGED
@@ -44,4 +44,44 @@ describe Crawler do
|
|
44
44
|
it { should be_nil }
|
45
45
|
end
|
46
46
|
end
|
47
|
+
|
48
|
+
describe 'Custom Crawler' do
|
49
|
+
let(:custom_crawler) { Crawler.new(:custom, "Custom/1.0") }
|
50
|
+
before { Crawler::CUSTOM << custom_crawler }
|
51
|
+
context '.matches_any' do
|
52
|
+
subject { Crawler.matches_any?(user_agent) }
|
53
|
+
context 'When the provided string matches the custom crawler' do
|
54
|
+
let(:user_agent) { "Custom/1.0" }
|
55
|
+
it { should be_true }
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'When the provided string does not match the custom crawler' do
|
59
|
+
it { should be_false }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context '.which_crawler' do
|
64
|
+
subject { Crawler.which_crawler(user_agent) }
|
65
|
+
context 'When the provided string matches the custom crawler' do
|
66
|
+
let(:user_agent) { "Custom/1.0" }
|
67
|
+
it { should be custom_crawler.name }
|
68
|
+
end
|
69
|
+
|
70
|
+
context 'When the provided string does not match the custom crawler' do
|
71
|
+
it { should_not be custom_crawler.name }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context '#matches?' do
|
76
|
+
subject { custom_crawler.matches?(user_agent) }
|
77
|
+
context 'When the provided string matches the custom crawler' do
|
78
|
+
let(:user_agent) { "Custom/1.0" }
|
79
|
+
it { should be_true }
|
80
|
+
end
|
81
|
+
|
82
|
+
context 'When the provided string does not match the custom crawler' do
|
83
|
+
it { should be_false }
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
47
87
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: is_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -76,7 +76,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
76
|
version: '0'
|
77
77
|
segments:
|
78
78
|
- 0
|
79
|
-
hash:
|
79
|
+
hash: 4337176170739540644
|
80
80
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
@@ -85,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
85
|
version: '0'
|
86
86
|
segments:
|
87
87
|
- 0
|
88
|
-
hash:
|
88
|
+
hash: 4337176170739540644
|
89
89
|
requirements: []
|
90
90
|
rubyforge_project:
|
91
91
|
rubygems_version: 1.8.24
|