is_crawler 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
- # IsCrawler
2
- [![Gem Version](https://badge.fury.io/rb/is_crawler.png)](http://badge.fury.io/rb/is_crawler) [![Code Climate](https://codeclimate.com/github/ccashwell/is_crawler.png)](https://codeclimate.com/github/ccashwell/is_crawler) [![Build Status](https://travis-ci.org/ccashwell/is_crawler.png?branch=master)](https://travis-ci.org/ccashwell/is_crawler)
1
+ # is_crawler [![Gem Version](https://badge.fury.io/rb/is_crawler.png)](http://badge.fury.io/rb/is_crawler) [![Build Status](https://travis-ci.org/ccashwell/is_crawler.png?branch=master)](https://travis-ci.org/ccashwell/is_crawler) [![Code Climate](https://codeclimate.com/github/ccashwell/is_crawler.png)](https://codeclimate.com/github/ccashwell/is_crawler)
3
2
 
4
- is\_crawler does exactly what you might think it does: determine if the supplied string matches a known crawler or bot.
3
+ This gem does one thing: determine if the supplied string matches a known crawler or bot. It matches against a very short list of strings found in the user agents that represent over 95% of crawler traffic. IMO, if it ain't detected, it ain't important.
5
4
 
6
5
  ## Installation
7
6
 
@@ -24,7 +23,7 @@ You can use the `is_crawler?` method with just a user agent string to determine
24
23
  class MyController < ActionController::Base
25
24
  include IsCrawler
26
25
  def index
27
- if is_any_crawler? request.env["HTTP_USER_AGENT"]
26
+ if is_crawler? request.env["HTTP_USER_AGENT"]
28
27
  render 'special_crawler_index'
29
28
  else
30
29
  render 'normal_boring_index'
@@ -38,7 +37,7 @@ You can use the `is_crawler?` method with just a user agent string to determine
38
37
 
39
38
  You can also define custom crawlers like this:
40
39
 
41
- Crawler::CUSTOM << Crawler.new(:custom_crawler_name, "string that is always present in crawler requests")
40
+ Crawler::CUSTOM << Crawler.new(:custom_crawler_name, "string that is always present in the crawler's user agent")
42
41
 
43
42
  That's it!
44
43
 
@@ -1,34 +1,22 @@
1
- addthis:
2
- ua_string: AddThis.com
3
- alexa:
4
- ua_string: ia_archiver
5
- archive_org:
6
- ua_string: archive.org_bot
7
- bing:
8
- ua_string: bingbot
9
- bitly:
10
- ua_string: bitlybot
11
- exabot:
12
- ua_string: Exabot
13
- facebook:
14
- ua_string: facebookexternalhit
15
- flipboard:
16
- ua_string: FlipboardProxy
17
- google:
18
- ua_string: Googlebot
19
- google_web_preview:
20
- ua_string: Google Web Preview
21
- msn:
22
- ua_string: MSNBot
23
- openwebspider:
24
- ua_string: OpenWebSpider
25
- technorati:
26
- ua_string: Technoratibot
27
- twitter:
28
- ua_string: Twitterbot
29
- yahoo:
30
- ua_string: Yahoo! Slurp
31
- yahoo_jp:
32
- ua_string: Y!J
33
- yandex:
34
- ua_string: Yandex
1
+ crawlers:
2
+ addthis: AddThis.com
3
+ alexa: ia_archiver
4
+ archive_org: archive.org_bot
5
+ ask: Ask Jeeves
6
+ baidu: baidu
7
+ bing: bingbot
8
+ bitly: bitlybot
9
+ blekko: Blekkobot
10
+ exabot: Exabot
11
+ facebook: facebookexternalhit
12
+ flipboard: FlipboardProxy
13
+ google: Googlebot
14
+ google_web_preview: Google Web Preview
15
+ msn: MSNBot
16
+ mywebsearch: MyWebSearch
17
+ openwebspider: OpenWebSpider
18
+ technorati: Technoratibot
19
+ twitter: Twitterbot
20
+ yahoo: Yahoo! Slurp
21
+ yahoo_jp: Y!J
22
+ yandex: Yandex
data/lib/crawler.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  require 'yaml'
2
2
 
3
3
  class Crawler < Struct.new(:name, :ua_string)
4
- DEFAULT = YAML.load(File.read(File.expand_path('../config/crawlers.yml', __FILE__))).collect do |k,v|
5
- Crawler.new(k.to_sym, v["ua_string"])
4
+ DEFAULT = YAML.load(File.read(File.expand_path('../config/crawlers.yml', __FILE__)))["crawlers"].collect do |k,v|
5
+ Crawler.new(k.to_sym, v)
6
6
  end
7
7
 
8
8
  CUSTOM = []
@@ -17,7 +17,7 @@ class Crawler < Struct.new(:name, :ua_string)
17
17
  end
18
18
 
19
19
  def which_crawler user_agent
20
- all.detect {|crawler| crawler.matches? user_agent }.name rescue nil
20
+ all.detect { |crawler| crawler.matches? user_agent }.name rescue nil
21
21
  end
22
22
  end
23
23
 
@@ -1,3 +1,3 @@
1
1
  module IsCrawler
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
data/lib/is_crawler.rb CHANGED
@@ -6,7 +6,7 @@ module IsCrawler
6
6
  if specific_crawlers && specific_crawlers.size > 0
7
7
  specific_crawlers.include?(Crawler.which_crawler(requesting_user_agent))
8
8
  else
9
- Crawler.matches_any?(requesting_user_agent) unless specific_crawlers.size > 0
9
+ Crawler.matches_any?(requesting_user_agent)
10
10
  end
11
11
  end
12
12
  end
@@ -1,62 +1,69 @@
1
1
  describe Crawler do
2
- let(:user_agent) { "Commodo Vestibulum/1.0" }
2
+ let(:chrome_user_agent) { 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17' }
3
+ let(:google_user_agent) { 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' }
4
+ let(:facebook_user_agent) { 'facebookexternalhit/1.0 (http://www.facebook.com/externalhit_uatext.php)' }
3
5
 
4
6
  describe '.matches_any?' do
5
7
  subject { Crawler.matches_any?(user_agent) }
6
8
 
7
9
  context 'When an unknown user agent is encountered' do
10
+ let(:user_agent) { chrome_user_agent }
8
11
  it { should be_false }
9
12
  end
10
13
 
11
14
  context 'When a known user agent is encountered' do
12
- Crawler.all.each do |crawler|
13
- context "such as the #{crawler.name.to_s} bot" do
14
- let(:user_agent) { "#{crawler.ua_string}" }
15
- it { should be_true }
16
- end
15
+ context 'such as the facebook crawler' do
16
+ let(:user_agent) { facebook_user_agent }
17
+ it { should be_true }
18
+ end
19
+
20
+ context 'such as the Googlebot' do
21
+ let(:user_agent) { google_user_agent }
22
+ it { should be_true }
17
23
  end
18
24
  end
19
25
  end
20
26
 
21
27
  describe '#matches?' do
22
- Crawler.all.each do |crawler|
23
- describe "Comparing #{crawler.name.to_s}'s known UA string" do
24
- subject { crawler.matches?(user_agent) }
25
- context "with a string containing '#{crawler.ua_string}'" do
26
- let(:user_agent) { "Mozilla/5.0 #{crawler.ua_string}/1.1 (KHTML, like Gecko)" }
27
- it { should be_true }
28
- end
28
+ describe 'Comparing Googlebot\'s known UA string' do
29
+ subject { Crawler.new(:google, 'Googlebot').matches?(user_agent) }
30
+ context "with a matching string" do
31
+ let(:user_agent) { google_user_agent }
32
+ it { should be_true }
33
+ end
29
34
 
30
- context 'with a non-matching string' do
31
- it { should be_false }
32
- end
35
+ context 'with a non-matching string' do
36
+ let(:user_agent) { chrome_user_agent }
37
+ it { should be_false }
33
38
  end
34
39
  end
35
40
  end
36
41
 
37
- describe '#which_crawler' do
42
+ describe '.which_crawler' do
38
43
  subject { Crawler.which_crawler(user_agent) }
39
44
  context 'When the provided string matches a crawler' do
40
- let(:user_agent) { "facebookexternalhit/1.1" }
45
+ let(:user_agent) { facebook_user_agent }
41
46
  it { should be :facebook }
42
47
  end
43
48
 
44
49
  context 'When the provided string matches no crawlers' do
50
+ let(:user_agent) { chrome_user_agent }
45
51
  it { should be_nil }
46
52
  end
47
53
  end
48
54
 
49
55
  describe 'Custom Crawler' do
50
56
  let(:custom_crawler) { Crawler.new(:custom, "Custom/1.0") }
57
+ let(:user_agent) { custom_crawler.ua_string }
51
58
  before { Crawler::CUSTOM << custom_crawler }
52
59
  context '.matches_any' do
53
60
  subject { Crawler.matches_any?(user_agent) }
54
61
  context 'When the provided string matches the custom crawler' do
55
- let(:user_agent) { "Custom/1.0" }
56
62
  it { should be_true }
57
63
  end
58
64
 
59
65
  context 'When the provided string does not match the custom crawler' do
66
+ let(:user_agent) { chrome_user_agent }
60
67
  it { should be_false }
61
68
  end
62
69
  end
@@ -64,11 +71,11 @@ describe Crawler do
64
71
  context '.which_crawler' do
65
72
  subject { Crawler.which_crawler(user_agent) }
66
73
  context 'When the provided string matches the custom crawler' do
67
- let(:user_agent) { "Custom/1.0" }
68
74
  it { should be custom_crawler.name }
69
75
  end
70
76
 
71
77
  context 'When the provided string does not match the custom crawler' do
78
+ let(:user_agent) { chrome_user_agent }
72
79
  it { should_not be custom_crawler.name }
73
80
  end
74
81
  end
@@ -76,11 +83,11 @@ describe Crawler do
76
83
  context '#matches?' do
77
84
  subject { custom_crawler.matches?(user_agent) }
78
85
  context 'When the provided string matches the custom crawler' do
79
- let(:user_agent) { "Custom/1.0" }
80
86
  it { should be_true }
81
87
  end
82
88
 
83
89
  context 'When the provided string does not match the custom crawler' do
90
+ let(:user_agent) { chrome_user_agent }
84
91
  it { should be_false }
85
92
  end
86
93
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: is_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-03 00:00:00.000000000 Z
12
+ date: 2013-03-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -77,7 +77,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
77
77
  version: '0'
78
78
  segments:
79
79
  - 0
80
- hash: -3983243957140806942
80
+ hash: -1826155163722186205
81
81
  required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  none: false
83
83
  requirements:
@@ -86,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
86
  version: '0'
87
87
  segments:
88
88
  - 0
89
- hash: -3983243957140806942
89
+ hash: -1826155163722186205
90
90
  requirements: []
91
91
  rubyforge_project:
92
92
  rubygems_version: 1.8.24