is_crawler 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in is_crawler.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Chris Cashwell
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,46 @@
1
+ # IsCrawler
2
+
3
+ is\_crawler does exactly what you might think it does: determine if the current request is coming from a crawler or bot.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'is_crawler'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install is_crawler
18
+
19
+ ## Usage
20
+
21
+ You can use the `is_any_crawler?` method to determine if the given string, as you might have guessed, matches *any* (noteworthy) crawler, like so:
22
+
23
+ class MyController < ActionController::Base
24
+ include IsCrawler
25
+ def index
26
+ if is_any_crawler? request.env["HTTP_USER_AGENT"]
27
+ render 'special_crawler_index'
28
+ else
29
+ render 'normal_boring_index'
30
+ end
31
+ end
32
+ end
33
+
34
+ ...or the `is_crawler?("Some User Agent/1.0", :facebook, :google)` methods to determine if you're dealing with one or more specific crawlers. That's it!
35
+
36
+ ## Contributing
37
+
38
+ Missing out on a noteworthy crawler? Find a problem? Ideas for improvement?
39
+
40
+ Raise an issue, or:
41
+
42
+ 1. Fork it
43
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
44
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
45
+ 4. Push to the branch (`git push origin my-new-feature`)
46
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'is_crawler'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "is_crawler"
8
+ gem.version = IsCrawler::VERSION
9
+ gem.authors = ["Chris Cashwell"]
10
+ gem.email = ["ccashwell@gmail.com"]
11
+ gem.description = %q{is_crawler does just what you might expect: detect whether the current request is from a crawler.}
12
+ gem.summary = %q{Simple, effective crawler and bot detection.}
13
+ gem.homepage = "http://github.com/ccashwell/is_crawler"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency "rspec"
21
+ end
data/lib/crawler.rb ADDED
@@ -0,0 +1,18 @@
1
+ class Crawler < Struct.new(:name, :ua_string)
2
+ BING = Crawler.new(:bing, "bingbot/2.0")
3
+ FACEBOOK = Crawler.new(:facebook, "facebookexternalhit/1.1")
4
+ GOOGLE = Crawler.new(:google, "Googlebot/2.1")
5
+ MSN = Crawler.new(:msn, "MSNBot")
6
+ TWITTER = Crawler.new(:twitter, "Twitterbot")
7
+ YAHOO = Crawler.new(:yahoo, "Yahoo! Slurp")
8
+
9
+ ALL = [BING, FACEBOOK, GOOGLE, MSN, TWITTER, YAHOO]
10
+
11
+ def self.matches_any? user_agent
12
+ ALL.detect { |crawler| crawler.matches? user_agent } != nil
13
+ end
14
+
15
+ def matches? user_agent
16
+ user_agent.downcase.include? ua_string.downcase
17
+ end
18
+ end
@@ -0,0 +1,3 @@
1
+ module IsCrawler
2
+ VERSION = "0.0.1"
3
+ end
data/lib/is_crawler.rb ADDED
@@ -0,0 +1,17 @@
1
+ require './lib/is_crawler/version'
2
+ require './lib/crawler'
3
+
4
+ module IsCrawler
5
+ def is_any_crawler? requesting_user_agent
6
+ Crawler.matches_any? requesting_user_agent
7
+ end
8
+
9
+ def is_crawler? requesting_user_agent, *specific_crawlers
10
+ crawler = which_crawler(requesting_user_agent)
11
+ crawler && specific_crawlers.include?(crawler.name) ? true : false
12
+ end
13
+
14
+ def which_crawler requesting_user_agent
15
+ Crawler::ALL.detect {|crawler| crawler.matches? requesting_user_agent }
16
+ end
17
+ end
@@ -0,0 +1,12 @@
1
+ describe IsCrawler do
2
+ describe '#is_any_crawler?' do
3
+ let(:user_agent) { "Commodo Vestibulum/1.0" }
4
+ subject { Test.new.is_any_crawler?(user_agent) }
5
+ it 'defers to Crawler#matches_any?' do
6
+ Crawler.should_receive(:matches_any?).with(user_agent)
7
+ subject
8
+ end
9
+ end
10
+ end
11
+
12
+ class Test; include IsCrawler; end
@@ -0,0 +1,33 @@
1
+ describe Crawler do
2
+ describe '.matches_any?' do
3
+ let(:user_agent) { "Commodo Vestibulum/1.0" }
4
+ subject { Crawler.matches_any?(user_agent) }
5
+
6
+ context 'When an unknown user agent is encountered' do
7
+ it { should be_false }
8
+ end
9
+
10
+ context 'When a known user agent is encountered' do
11
+ Crawler::ALL.each do |crawler|
12
+ context "such as the #{crawler.name.to_s} bot" do
13
+ let(:user_agent) { "#{crawler.ua_string}" }
14
+ it { should be_true }
15
+ end
16
+ end
17
+ end
18
+ end
19
+
20
+ describe '#matches?' do
21
+ Crawler::ALL.each do |crawler|
22
+ describe "Comparing #{crawler.name.to_s}'s known UA string" do
23
+ it 'with a matching string' do
24
+ crawler.matches?(crawler.ua_string).should == true
25
+ end
26
+
27
+ it 'with a non-matching string' do
28
+ crawler.matches?('Commodo Vestibulum/1.0').should == false
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: is_crawler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Chris Cashwell
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-02-27 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: ! 'is_crawler does just what you might expect: detect whether the current
31
+ request is from a crawler.'
32
+ email:
33
+ - ccashwell@gmail.com
34
+ executables: []
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - .gitignore
39
+ - Gemfile
40
+ - LICENSE.txt
41
+ - README.md
42
+ - Rakefile
43
+ - is_crawler.gemspec
44
+ - lib/crawler.rb
45
+ - lib/is_crawler.rb
46
+ - lib/is_crawler/version.rb
47
+ - spec/is_crawler_spec.rb
48
+ - spec/lib/crawler_spec.rb
49
+ homepage: http://github.com/ccashwell/is_crawler
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 1.8.24
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Simple, effective crawler and bot detection.
73
+ test_files:
74
+ - spec/is_crawler_spec.rb
75
+ - spec/lib/crawler_spec.rb