is_crawler 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -2
- data/Rakefile +3 -1
- data/lib/crawler.rb +4 -0
- data/lib/is_crawler/version.rb +1 -1
- data/lib/is_crawler.rb +5 -10
- data/spec/is_crawler_spec.rb +35 -6
- data/spec/lib/crawler_spec.rb +19 -5
- metadata +4 -4
data/README.md
CHANGED
@@ -19,7 +19,7 @@ Or install it yourself as:
|
|
19
19
|
|
20
20
|
## Usage
|
21
21
|
|
22
|
-
You can use the `
|
22
|
+
You can use the `is_crawler?` method with just a user agent string to determine if the that string matches *any* (noteworthy) crawler, like so:
|
23
23
|
|
24
24
|
class MyController < ActionController::Base
|
25
25
|
include IsCrawler
|
@@ -32,7 +32,7 @@ You can use the `is_any_crawler?` method to determine if the given string, as yo
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
...or
|
35
|
+
...or provide one or more crawlers `is_crawler?("Some User Agent/1.0", :facebook, :google)` to find out if the string matches one of those crawlers in specific. That's it!
|
36
36
|
|
37
37
|
## Contributing
|
38
38
|
|
data/Rakefile
CHANGED
data/lib/crawler.rb
CHANGED
@@ -25,6 +25,10 @@ class Crawler < Struct.new(:name, :ua_string)
|
|
25
25
|
def matches_any? user_agent
|
26
26
|
all.detect { |crawler| crawler.matches? user_agent } != nil
|
27
27
|
end
|
28
|
+
|
29
|
+
def which_crawler user_agent
|
30
|
+
all.detect {|crawler| crawler.matches? user_agent }.name rescue nil
|
31
|
+
end
|
28
32
|
end
|
29
33
|
|
30
34
|
def matches? user_agent
|
data/lib/is_crawler/version.rb
CHANGED
data/lib/is_crawler.rb
CHANGED
@@ -2,16 +2,11 @@ require './lib/is_crawler/version'
|
|
2
2
|
require './lib/crawler'
|
3
3
|
|
4
4
|
module IsCrawler
|
5
|
-
def is_any_crawler? requesting_user_agent
|
6
|
-
Crawler.matches_any? requesting_user_agent
|
7
|
-
end
|
8
|
-
|
9
5
|
def is_crawler? requesting_user_agent, *specific_crawlers
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
Crawler.all.detect {|crawler| crawler.matches? requesting_user_agent }
|
6
|
+
if specific_crawlers && specific_crawlers.size > 0
|
7
|
+
specific_crawlers.include?(Crawler.which_crawler(requesting_user_agent))
|
8
|
+
else
|
9
|
+
Crawler.matches_any?(requesting_user_agent) unless specific_crawlers.size > 0
|
10
|
+
end
|
16
11
|
end
|
17
12
|
end
|
data/spec/is_crawler_spec.rb
CHANGED
@@ -1,10 +1,39 @@
|
|
1
1
|
describe IsCrawler do
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
let(:user_agent) { "Commodo Vestibulum/1.0" }
|
3
|
+
|
4
|
+
describe '#is_crawler?' do
|
5
|
+
context 'When specific crawlers are provided' do
|
6
|
+
subject { Test.new.is_crawler?(user_agent, :facebook, :google) }
|
7
|
+
context 'When the provided string matches a crawler' do
|
8
|
+
context 'and it is in the specified list' do
|
9
|
+
context 'as the first element' do
|
10
|
+
let(:user_agent) { "facebookexternalhit/1.1" }
|
11
|
+
it { should be_true }
|
12
|
+
end
|
13
|
+
|
14
|
+
context 'as a subsequent element' do
|
15
|
+
let(:user_agent) { "Googlebot/1.1" }
|
16
|
+
it { should be_true }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'and it is not in the specified list' do
|
21
|
+
let(:user_agent) { "Twitterbot/1.1" }
|
22
|
+
it { should be_false }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'When the provided string matches no crawlers' do
|
27
|
+
it { should be_false }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'When no specific crawlers are provided' do
|
32
|
+
subject { Test.new.is_crawler?(user_agent) }
|
33
|
+
it 'defers to Crawler#matches_any' do
|
34
|
+
Crawler.should_receive(:matches_any?).with(user_agent)
|
35
|
+
subject
|
36
|
+
end
|
8
37
|
end
|
9
38
|
end
|
10
39
|
end
|
data/spec/lib/crawler_spec.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
describe Crawler do
|
2
|
+
let(:user_agent) { "Commodo Vestibulum/1.0" }
|
2
3
|
describe '.matches_any?' do
|
3
|
-
let(:user_agent) { "Commodo Vestibulum/1.0" }
|
4
4
|
subject { Crawler.matches_any?(user_agent) }
|
5
5
|
|
6
6
|
context 'When an unknown user agent is encountered' do
|
@@ -20,14 +20,28 @@ describe Crawler do
|
|
20
20
|
describe '#matches?' do
|
21
21
|
Crawler.all.each do |crawler|
|
22
22
|
describe "Comparing #{crawler.name.to_s}'s known UA string" do
|
23
|
-
|
24
|
-
|
23
|
+
subject { crawler.matches?(user_agent) }
|
24
|
+
context "with a string containing '#{crawler.ua_string}'" do
|
25
|
+
let(:user_agent) { "Mozilla/5.0 #{crawler.ua_string}/1.1 (KHTML, like Gecko)" }
|
26
|
+
it { should be_true }
|
25
27
|
end
|
26
28
|
|
27
|
-
|
28
|
-
|
29
|
+
context 'with a non-matching string' do
|
30
|
+
it { should be_false }
|
29
31
|
end
|
30
32
|
end
|
31
33
|
end
|
32
34
|
end
|
35
|
+
|
36
|
+
describe '#which_crawler' do
|
37
|
+
subject { Crawler.which_crawler(user_agent) }
|
38
|
+
context 'When the provided string matches a crawler' do
|
39
|
+
let(:user_agent) { "facebookexternalhit/1.1" }
|
40
|
+
it { should == :facebook }
|
41
|
+
end
|
42
|
+
|
43
|
+
context 'When the provided string matches no crawlers' do
|
44
|
+
it { should be_nil }
|
45
|
+
end
|
46
|
+
end
|
33
47
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: is_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-03-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -76,7 +76,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
76
|
version: '0'
|
77
77
|
segments:
|
78
78
|
- 0
|
79
|
-
hash:
|
79
|
+
hash: 737772547991189407
|
80
80
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
@@ -85,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
85
|
version: '0'
|
86
86
|
segments:
|
87
87
|
- 0
|
88
|
-
hash:
|
88
|
+
hash: 737772547991189407
|
89
89
|
requirements: []
|
90
90
|
rubyforge_project:
|
91
91
|
rubygems_version: 1.8.24
|