is_crawler 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/Rakefile +3 -1
- data/lib/crawler.rb +4 -0
- data/lib/is_crawler/version.rb +1 -1
- data/lib/is_crawler.rb +5 -10
- data/spec/is_crawler_spec.rb +35 -6
- data/spec/lib/crawler_spec.rb +19 -5
- metadata +4 -4
data/README.md
CHANGED
@@ -19,7 +19,7 @@ Or install it yourself as:
|
|
19
19
|
|
20
20
|
## Usage
|
21
21
|
|
22
|
-
You can use the `
|
22
|
+
You can use the `is_crawler?` method with just a user agent string to determine if the that string matches *any* (noteworthy) crawler, like so:
|
23
23
|
|
24
24
|
class MyController < ActionController::Base
|
25
25
|
include IsCrawler
|
@@ -32,7 +32,7 @@ You can use the `is_any_crawler?` method to determine if the given string, as yo
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
...or
|
35
|
+
...or provide one or more crawlers `is_crawler?("Some User Agent/1.0", :facebook, :google)` to find out if the string matches one of those crawlers in specific. That's it!
|
36
36
|
|
37
37
|
## Contributing
|
38
38
|
|
data/Rakefile
CHANGED
data/lib/crawler.rb
CHANGED
@@ -25,6 +25,10 @@ class Crawler < Struct.new(:name, :ua_string)
|
|
25
25
|
def matches_any? user_agent
|
26
26
|
all.detect { |crawler| crawler.matches? user_agent } != nil
|
27
27
|
end
|
28
|
+
|
29
|
+
def which_crawler user_agent
|
30
|
+
all.detect {|crawler| crawler.matches? user_agent }.name rescue nil
|
31
|
+
end
|
28
32
|
end
|
29
33
|
|
30
34
|
def matches? user_agent
|
data/lib/is_crawler/version.rb
CHANGED
data/lib/is_crawler.rb
CHANGED
@@ -2,16 +2,11 @@ require './lib/is_crawler/version'
|
|
2
2
|
require './lib/crawler'
|
3
3
|
|
4
4
|
module IsCrawler
|
5
|
-
def is_any_crawler? requesting_user_agent
|
6
|
-
Crawler.matches_any? requesting_user_agent
|
7
|
-
end
|
8
|
-
|
9
5
|
def is_crawler? requesting_user_agent, *specific_crawlers
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
Crawler.all.detect {|crawler| crawler.matches? requesting_user_agent }
|
6
|
+
if specific_crawlers && specific_crawlers.size > 0
|
7
|
+
specific_crawlers.include?(Crawler.which_crawler(requesting_user_agent))
|
8
|
+
else
|
9
|
+
Crawler.matches_any?(requesting_user_agent) unless specific_crawlers.size > 0
|
10
|
+
end
|
16
11
|
end
|
17
12
|
end
|
data/spec/is_crawler_spec.rb
CHANGED
@@ -1,10 +1,39 @@
|
|
1
1
|
describe IsCrawler do
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
let(:user_agent) { "Commodo Vestibulum/1.0" }
|
3
|
+
|
4
|
+
describe '#is_crawler?' do
|
5
|
+
context 'When specific crawlers are provided' do
|
6
|
+
subject { Test.new.is_crawler?(user_agent, :facebook, :google) }
|
7
|
+
context 'When the provided string matches a crawler' do
|
8
|
+
context 'and it is in the specified list' do
|
9
|
+
context 'as the first element' do
|
10
|
+
let(:user_agent) { "facebookexternalhit/1.1" }
|
11
|
+
it { should be_true }
|
12
|
+
end
|
13
|
+
|
14
|
+
context 'as a subsequent element' do
|
15
|
+
let(:user_agent) { "Googlebot/1.1" }
|
16
|
+
it { should be_true }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'and it is not in the specified list' do
|
21
|
+
let(:user_agent) { "Twitterbot/1.1" }
|
22
|
+
it { should be_false }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'When the provided string matches no crawlers' do
|
27
|
+
it { should be_false }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'When no specific crawlers are provided' do
|
32
|
+
subject { Test.new.is_crawler?(user_agent) }
|
33
|
+
it 'defers to Crawler#matches_any' do
|
34
|
+
Crawler.should_receive(:matches_any?).with(user_agent)
|
35
|
+
subject
|
36
|
+
end
|
8
37
|
end
|
9
38
|
end
|
10
39
|
end
|
data/spec/lib/crawler_spec.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
describe Crawler do
|
2
|
+
let(:user_agent) { "Commodo Vestibulum/1.0" }
|
2
3
|
describe '.matches_any?' do
|
3
|
-
let(:user_agent) { "Commodo Vestibulum/1.0" }
|
4
4
|
subject { Crawler.matches_any?(user_agent) }
|
5
5
|
|
6
6
|
context 'When an unknown user agent is encountered' do
|
@@ -20,14 +20,28 @@ describe Crawler do
|
|
20
20
|
describe '#matches?' do
|
21
21
|
Crawler.all.each do |crawler|
|
22
22
|
describe "Comparing #{crawler.name.to_s}'s known UA string" do
|
23
|
-
|
24
|
-
|
23
|
+
subject { crawler.matches?(user_agent) }
|
24
|
+
context "with a string containing '#{crawler.ua_string}'" do
|
25
|
+
let(:user_agent) { "Mozilla/5.0 #{crawler.ua_string}/1.1 (KHTML, like Gecko)" }
|
26
|
+
it { should be_true }
|
25
27
|
end
|
26
28
|
|
27
|
-
|
28
|
-
|
29
|
+
context 'with a non-matching string' do
|
30
|
+
it { should be_false }
|
29
31
|
end
|
30
32
|
end
|
31
33
|
end
|
32
34
|
end
|
35
|
+
|
36
|
+
describe '#which_crawler' do
|
37
|
+
subject { Crawler.which_crawler(user_agent) }
|
38
|
+
context 'When the provided string matches a crawler' do
|
39
|
+
let(:user_agent) { "facebookexternalhit/1.1" }
|
40
|
+
it { should == :facebook }
|
41
|
+
end
|
42
|
+
|
43
|
+
context 'When the provided string matches no crawlers' do
|
44
|
+
it { should be_nil }
|
45
|
+
end
|
46
|
+
end
|
33
47
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: is_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-03-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -76,7 +76,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
76
|
version: '0'
|
77
77
|
segments:
|
78
78
|
- 0
|
79
|
-
hash:
|
79
|
+
hash: 737772547991189407
|
80
80
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
@@ -85,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
85
|
version: '0'
|
86
86
|
segments:
|
87
87
|
- 0
|
88
|
-
hash:
|
88
|
+
hash: 737772547991189407
|
89
89
|
requirements: []
|
90
90
|
rubyforge_project:
|
91
91
|
rubygems_version: 1.8.24
|