is_crawler 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +8 -2
- data/lib/crawler.rb +23 -19
- data/lib/is_crawler/version.rb +1 -1
- data/spec/lib/crawler_spec.rb +40 -0
- metadata +3 -3
data/README.md
CHANGED
@@ -19,7 +19,7 @@ Or install it yourself as:
|
|
19
19
|
|
20
20
|
## Usage
|
21
21
|
|
22
|
-
You can use the `is_crawler?` method with just a user agent string to determine if the that string matches *any*
|
22
|
+
You can use the `is_crawler?` method with just a user agent string to determine if the that string matches *any* known crawler, like so:
|
23
23
|
|
24
24
|
class MyController < ActionController::Base
|
25
25
|
include IsCrawler
|
@@ -32,7 +32,13 @@ You can use the `is_crawler?` method with just a user agent string to determine
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
...or provide one or more crawlers `is_crawler?("Some User Agent/1.0", :facebook, :google)`
|
35
|
+
...or provide one or more crawlers to find out if the string matches *specific* crawlers: `is_crawler?("Some User Agent/1.0", :facebook, :google)`
|
36
|
+
|
37
|
+
You can also define custom crawlers like this:
|
38
|
+
|
39
|
+
Crawler::CUSTOM << Crawler.new(:custom_crawler_name, "string that is always present in crawler requests")
|
40
|
+
|
41
|
+
That's it!
|
36
42
|
|
37
43
|
## Contributing
|
38
44
|
|
data/lib/crawler.rb
CHANGED
@@ -1,25 +1,29 @@
|
|
1
1
|
class Crawler < Struct.new(:name, :ua_string)
|
2
|
+
DEFAULT = [
|
3
|
+
Crawler.new(:addthis, "AddThis.com"),
|
4
|
+
Crawler.new(:alexa, "ia_archiver"),
|
5
|
+
Crawler.new(:archive_org, "archive.org_bot"),
|
6
|
+
Crawler.new(:bing, "bingbot"),
|
7
|
+
Crawler.new(:bitly, "bitlybot"),
|
8
|
+
Crawler.new(:exabot, "Exabot"),
|
9
|
+
Crawler.new(:facebook, "facebookexternalhit"),
|
10
|
+
Crawler.new(:flipboard, "FlipboardProxy"),
|
11
|
+
Crawler.new(:google, "Googlebot"),
|
12
|
+
Crawler.new(:google_web_preview, "Google Web Preview"),
|
13
|
+
Crawler.new(:msn, "MSNBot"),
|
14
|
+
Crawler.new(:openwebspider, "OpenWebSpider"),
|
15
|
+
Crawler.new(:technorati, "Technoratibot"),
|
16
|
+
Crawler.new(:twitter, "Twitterbot"),
|
17
|
+
Crawler.new(:yahoo, "Yahoo! Slurp"),
|
18
|
+
Crawler.new(:yahoo_jp, "Y!J"),
|
19
|
+
Crawler.new(:yandex, "Yandex")
|
20
|
+
].freeze
|
21
|
+
|
22
|
+
CUSTOM = []
|
23
|
+
|
2
24
|
class << self
|
3
25
|
def all
|
4
|
-
|
5
|
-
Crawler.new(:addthis, "AddThis.com"),
|
6
|
-
Crawler.new(:alexa, "ia_archiver"),
|
7
|
-
Crawler.new(:archive_org, "archive.org_bot"),
|
8
|
-
Crawler.new(:bing, "bingbot"),
|
9
|
-
Crawler.new(:bitly, "bitlybot"),
|
10
|
-
Crawler.new(:exabot, "Exabot"),
|
11
|
-
Crawler.new(:facebook, "facebookexternalhit"),
|
12
|
-
Crawler.new(:flipboard, "FlipboardProxy"),
|
13
|
-
Crawler.new(:google, "Googlebot"),
|
14
|
-
Crawler.new(:google_web_preview, "Google Web Preview"),
|
15
|
-
Crawler.new(:msn, "MSNBot"),
|
16
|
-
Crawler.new(:openwebspider, "OpenWebSpider"),
|
17
|
-
Crawler.new(:technorati, "Technoratibot"),
|
18
|
-
Crawler.new(:twitter, "Twitterbot"),
|
19
|
-
Crawler.new(:yahoo, "Yahoo! Slurp"),
|
20
|
-
Crawler.new(:yahoo_jp, "Y!J"),
|
21
|
-
Crawler.new(:yandex, "Yandex")
|
22
|
-
]
|
26
|
+
DEFAULT + CUSTOM
|
23
27
|
end
|
24
28
|
|
25
29
|
def matches_any? user_agent
|
data/lib/is_crawler/version.rb
CHANGED
data/spec/lib/crawler_spec.rb
CHANGED
@@ -44,4 +44,44 @@ describe Crawler do
|
|
44
44
|
it { should be_nil }
|
45
45
|
end
|
46
46
|
end
|
47
|
+
|
48
|
+
describe 'Custom Crawler' do
|
49
|
+
let(:custom_crawler) { Crawler.new(:custom, "Custom/1.0") }
|
50
|
+
before { Crawler::CUSTOM << custom_crawler }
|
51
|
+
context '.matches_any' do
|
52
|
+
subject { Crawler.matches_any?(user_agent) }
|
53
|
+
context 'When the provided string matches the custom crawler' do
|
54
|
+
let(:user_agent) { "Custom/1.0" }
|
55
|
+
it { should be_true }
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'When the provided string does not match the custom crawler' do
|
59
|
+
it { should be_false }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context '.which_crawler' do
|
64
|
+
subject { Crawler.which_crawler(user_agent) }
|
65
|
+
context 'When the provided string matches the custom crawler' do
|
66
|
+
let(:user_agent) { "Custom/1.0" }
|
67
|
+
it { should be custom_crawler.name }
|
68
|
+
end
|
69
|
+
|
70
|
+
context 'When the provided string does not match the custom crawler' do
|
71
|
+
it { should_not be custom_crawler.name }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context '#matches?' do
|
76
|
+
subject { custom_crawler.matches?(user_agent) }
|
77
|
+
context 'When the provided string matches the custom crawler' do
|
78
|
+
let(:user_agent) { "Custom/1.0" }
|
79
|
+
it { should be_true }
|
80
|
+
end
|
81
|
+
|
82
|
+
context 'When the provided string does not match the custom crawler' do
|
83
|
+
it { should be_false }
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
47
87
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: is_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -76,7 +76,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
76
|
version: '0'
|
77
77
|
segments:
|
78
78
|
- 0
|
79
|
-
hash:
|
79
|
+
hash: 4337176170739540644
|
80
80
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
@@ -85,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
85
|
version: '0'
|
86
86
|
segments:
|
87
87
|
- 0
|
88
|
-
hash:
|
88
|
+
hash: 4337176170739540644
|
89
89
|
requirements: []
|
90
90
|
rubyforge_project:
|
91
91
|
rubygems_version: 1.8.24
|