is_crawler 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
- # IsCrawler
2
- [![Gem Version](https://badge.fury.io/rb/is_crawler.png)](http://badge.fury.io/rb/is_crawler) [![Code Climate](https://codeclimate.com/github/ccashwell/is_crawler.png)](https://codeclimate.com/github/ccashwell/is_crawler) [![Build Status](https://travis-ci.org/ccashwell/is_crawler.png?branch=master)](https://travis-ci.org/ccashwell/is_crawler)
1
+ # is_crawler [![Gem Version](https://badge.fury.io/rb/is_crawler.png)](http://badge.fury.io/rb/is_crawler) [![Build Status](https://travis-ci.org/ccashwell/is_crawler.png?branch=master)](https://travis-ci.org/ccashwell/is_crawler) [![Code Climate](https://codeclimate.com/github/ccashwell/is_crawler.png)](https://codeclimate.com/github/ccashwell/is_crawler)
3
2
 
4
- is\_crawler does exactly what you might think it does: determine if the supplied string matches a known crawler or bot.
3
+ This gem does one thing: determine if the supplied string matches a known crawler or bot. It matches against a very short list of strings found in the user agents that represent over 95% of crawler traffic. IMO, if it ain't detected, it ain't important.
5
4
 
6
5
  ## Installation
7
6
 
@@ -24,7 +23,7 @@ You can use the `is_crawler?` method with just a user agent string to determine
24
23
  class MyController < ActionController::Base
25
24
  include IsCrawler
26
25
  def index
27
- if is_any_crawler? request.env["HTTP_USER_AGENT"]
26
+ if is_crawler? request.env["HTTP_USER_AGENT"]
28
27
  render 'special_crawler_index'
29
28
  else
30
29
  render 'normal_boring_index'
@@ -38,7 +37,7 @@ You can use the `is_crawler?` method with just a user agent string to determine
38
37
 
39
38
  You can also define custom crawlers like this:
40
39
 
41
- Crawler::CUSTOM << Crawler.new(:custom_crawler_name, "string that is always present in crawler requests")
40
+ Crawler::CUSTOM << Crawler.new(:custom_crawler_name, "string that is always present in the crawler's user agent")
42
41
 
43
42
  That's it!
44
43
 
@@ -1,34 +1,22 @@
1
- addthis:
2
- ua_string: AddThis.com
3
- alexa:
4
- ua_string: ia_archiver
5
- archive_org:
6
- ua_string: archive.org_bot
7
- bing:
8
- ua_string: bingbot
9
- bitly:
10
- ua_string: bitlybot
11
- exabot:
12
- ua_string: Exabot
13
- facebook:
14
- ua_string: facebookexternalhit
15
- flipboard:
16
- ua_string: FlipboardProxy
17
- google:
18
- ua_string: Googlebot
19
- google_web_preview:
20
- ua_string: Google Web Preview
21
- msn:
22
- ua_string: MSNBot
23
- openwebspider:
24
- ua_string: OpenWebSpider
25
- technorati:
26
- ua_string: Technoratibot
27
- twitter:
28
- ua_string: Twitterbot
29
- yahoo:
30
- ua_string: Yahoo! Slurp
31
- yahoo_jp:
32
- ua_string: Y!J
33
- yandex:
34
- ua_string: Yandex
1
+ crawlers:
2
+ addthis: AddThis.com
3
+ alexa: ia_archiver
4
+ archive_org: archive.org_bot
5
+ ask: Ask Jeeves
6
+ baidu: baidu
7
+ bing: bingbot
8
+ bitly: bitlybot
9
+ blekko: Blekkobot
10
+ exabot: Exabot
11
+ facebook: facebookexternalhit
12
+ flipboard: FlipboardProxy
13
+ google: Googlebot
14
+ google_web_preview: Google Web Preview
15
+ msn: MSNBot
16
+ mywebsearch: MyWebSearch
17
+ openwebspider: OpenWebSpider
18
+ technorati: Technoratibot
19
+ twitter: Twitterbot
20
+ yahoo: Yahoo! Slurp
21
+ yahoo_jp: Y!J
22
+ yandex: Yandex
data/lib/crawler.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  require 'yaml'
2
2
 
3
3
  class Crawler < Struct.new(:name, :ua_string)
4
- DEFAULT = YAML.load(File.read(File.expand_path('../config/crawlers.yml', __FILE__))).collect do |k,v|
5
- Crawler.new(k.to_sym, v["ua_string"])
4
+ DEFAULT = YAML.load(File.read(File.expand_path('../config/crawlers.yml', __FILE__)))["crawlers"].collect do |k,v|
5
+ Crawler.new(k.to_sym, v)
6
6
  end
7
7
 
8
8
  CUSTOM = []
@@ -17,7 +17,7 @@ class Crawler < Struct.new(:name, :ua_string)
17
17
  end
18
18
 
19
19
  def which_crawler user_agent
20
- all.detect {|crawler| crawler.matches? user_agent }.name rescue nil
20
+ all.detect { |crawler| crawler.matches? user_agent }.name rescue nil
21
21
  end
22
22
  end
23
23
 
@@ -1,3 +1,3 @@
1
1
  module IsCrawler
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
data/lib/is_crawler.rb CHANGED
@@ -6,7 +6,7 @@ module IsCrawler
6
6
  if specific_crawlers && specific_crawlers.size > 0
7
7
  specific_crawlers.include?(Crawler.which_crawler(requesting_user_agent))
8
8
  else
9
- Crawler.matches_any?(requesting_user_agent) unless specific_crawlers.size > 0
9
+ Crawler.matches_any?(requesting_user_agent)
10
10
  end
11
11
  end
12
12
  end
@@ -1,62 +1,69 @@
1
1
  describe Crawler do
2
- let(:user_agent) { "Commodo Vestibulum/1.0" }
2
+ let(:chrome_user_agent) { 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17' }
3
+ let(:google_user_agent) { 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' }
4
+ let(:facebook_user_agent) { 'facebookexternalhit/1.0 (http://www.facebook.com/externalhit_uatext.php)' }
3
5
 
4
6
  describe '.matches_any?' do
5
7
  subject { Crawler.matches_any?(user_agent) }
6
8
 
7
9
  context 'When an unknown user agent is encountered' do
10
+ let(:user_agent) { chrome_user_agent }
8
11
  it { should be_false }
9
12
  end
10
13
 
11
14
  context 'When a known user agent is encountered' do
12
- Crawler.all.each do |crawler|
13
- context "such as the #{crawler.name.to_s} bot" do
14
- let(:user_agent) { "#{crawler.ua_string}" }
15
- it { should be_true }
16
- end
15
+ context 'such as the facebook crawler' do
16
+ let(:user_agent) { facebook_user_agent }
17
+ it { should be_true }
18
+ end
19
+
20
+ context 'such as the Googlebot' do
21
+ let(:user_agent) { google_user_agent }
22
+ it { should be_true }
17
23
  end
18
24
  end
19
25
  end
20
26
 
21
27
  describe '#matches?' do
22
- Crawler.all.each do |crawler|
23
- describe "Comparing #{crawler.name.to_s}'s known UA string" do
24
- subject { crawler.matches?(user_agent) }
25
- context "with a string containing '#{crawler.ua_string}'" do
26
- let(:user_agent) { "Mozilla/5.0 #{crawler.ua_string}/1.1 (KHTML, like Gecko)" }
27
- it { should be_true }
28
- end
28
+ describe 'Comparing Googlebot\'s known UA string' do
29
+ subject { Crawler.new(:google, 'Googlebot').matches?(user_agent) }
30
+ context "with a matching string" do
31
+ let(:user_agent) { google_user_agent }
32
+ it { should be_true }
33
+ end
29
34
 
30
- context 'with a non-matching string' do
31
- it { should be_false }
32
- end
35
+ context 'with a non-matching string' do
36
+ let(:user_agent) { chrome_user_agent }
37
+ it { should be_false }
33
38
  end
34
39
  end
35
40
  end
36
41
 
37
- describe '#which_crawler' do
42
+ describe '.which_crawler' do
38
43
  subject { Crawler.which_crawler(user_agent) }
39
44
  context 'When the provided string matches a crawler' do
40
- let(:user_agent) { "facebookexternalhit/1.1" }
45
+ let(:user_agent) { facebook_user_agent }
41
46
  it { should be :facebook }
42
47
  end
43
48
 
44
49
  context 'When the provided string matches no crawlers' do
50
+ let(:user_agent) { chrome_user_agent }
45
51
  it { should be_nil }
46
52
  end
47
53
  end
48
54
 
49
55
  describe 'Custom Crawler' do
50
56
  let(:custom_crawler) { Crawler.new(:custom, "Custom/1.0") }
57
+ let(:user_agent) { custom_crawler.ua_string }
51
58
  before { Crawler::CUSTOM << custom_crawler }
52
59
  context '.matches_any' do
53
60
  subject { Crawler.matches_any?(user_agent) }
54
61
  context 'When the provided string matches the custom crawler' do
55
- let(:user_agent) { "Custom/1.0" }
56
62
  it { should be_true }
57
63
  end
58
64
 
59
65
  context 'When the provided string does not match the custom crawler' do
66
+ let(:user_agent) { chrome_user_agent }
60
67
  it { should be_false }
61
68
  end
62
69
  end
@@ -64,11 +71,11 @@ describe Crawler do
64
71
  context '.which_crawler' do
65
72
  subject { Crawler.which_crawler(user_agent) }
66
73
  context 'When the provided string matches the custom crawler' do
67
- let(:user_agent) { "Custom/1.0" }
68
74
  it { should be custom_crawler.name }
69
75
  end
70
76
 
71
77
  context 'When the provided string does not match the custom crawler' do
78
+ let(:user_agent) { chrome_user_agent }
72
79
  it { should_not be custom_crawler.name }
73
80
  end
74
81
  end
@@ -76,11 +83,11 @@ describe Crawler do
76
83
  context '#matches?' do
77
84
  subject { custom_crawler.matches?(user_agent) }
78
85
  context 'When the provided string matches the custom crawler' do
79
- let(:user_agent) { "Custom/1.0" }
80
86
  it { should be_true }
81
87
  end
82
88
 
83
89
  context 'When the provided string does not match the custom crawler' do
90
+ let(:user_agent) { chrome_user_agent }
84
91
  it { should be_false }
85
92
  end
86
93
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: is_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-03 00:00:00.000000000 Z
12
+ date: 2013-03-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -77,7 +77,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
77
77
  version: '0'
78
78
  segments:
79
79
  - 0
80
- hash: -3983243957140806942
80
+ hash: -1826155163722186205
81
81
  required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  none: false
83
83
  requirements:
@@ -86,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
86
  version: '0'
87
87
  segments:
88
88
  - 0
89
- hash: -3983243957140806942
89
+ hash: -1826155163722186205
90
90
  requirements: []
91
91
  rubyforge_project:
92
92
  rubygems_version: 1.8.24