rack-evil_robot 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ pkg
data/README.rdoc ADDED
@@ -0,0 +1,31 @@
1
+ = Rack::EvilRobot
2
+
3
+ == INSTALL
4
+
5
+ sudo gem install rack-evil_robot
6
+
7
+ == SETUP
8
+
9
+ require 'rack/evil_robot'
10
+ use Rack::EvilRobot
11
+
12
+ or
13
+
14
+ use Rack::EvilRobot, :redirect_path => "http://www.whatever-you-want.com"
15
+
16
+ == SET THE TRAP
17
+
18
+ Update your robots.txt to contain these lines
19
+
20
+ User-agent: *
21
+ Disallow: /honey_pot/index.html
22
+
23
+ Include this link tag on your page(s)
24
+ <a href="/honey_pot/index.html"><img src="images/pixel.gif" border="0" alt=" " width="1" height="1"></a>
25
+
26
+ After you have this set up for a few days check your access logs and look at the user agents that have been accessing /honey_pot/index.html.
27
+ Hopefully there is nothing there, but if there is, and they are misbehaving, add them to the regex defined in the 'evil_robots' method.
28
+ This will prevent this user agent from accessing any pages on your site anymore.
29
+
30
+ == Win
31
+ Now you don't have to worry about getting crawled by bots you don't care to be hit by (mp3 bots, torrent bots, bots that are slamming your servers, etc...)
data/Rakefile ADDED
@@ -0,0 +1,44 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "rack-evil_robot"
8
+ gem.summary = "Detect robots that are ignoring your robots.txt file and give them the middle finger"
9
+ gem.description = "Detect robots that are ignoring your robots.txt file and give them the middle finger"
10
+ gem.email = "shanewolf@gmail.com"
11
+ gem.homepage = "http://github.com/gizm0duck/rack-evil_robot"
12
+ gem.authors = ["Shane Wolf"]
13
+ gem.add_dependency "rack"
14
+ end
15
+ Jeweler::GemcutterTasks.new
16
+ rescue LoadError
17
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
18
+ end
19
+
20
+ require 'spec/rake/spectask'
21
+ Spec::Rake::SpecTask.new(:spec) do |spec|
22
+ spec.libs << 'lib' << 'spec'
23
+ spec.spec_files = FileList['spec/**/*_spec.rb']
24
+ end
25
+
26
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
27
+ spec.libs << 'lib' << 'spec'
28
+ spec.pattern = 'spec/**/*_spec.rb'
29
+ spec.rcov = true
30
+ end
31
+
32
+ task :spec => :check_dependencies
33
+
34
+ task :default => :spec
35
+
36
+ require 'rake/rdoctask'
37
+ Rake::RDocTask.new do |rdoc|
38
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
39
+
40
+ rdoc.rdoc_dir = 'rdoc'
41
+ rdoc.title = "rack-evil_robot #{version}"
42
+ rdoc.rdoc_files.include('README*')
43
+ rdoc.rdoc_files.include('lib/**/*.rb')
44
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,34 @@
1
+ module Rack
2
+ class EvilRobot
3
+ def initialize(app, options = {})
4
+ @app = app
5
+ @redirect_path = options[:redirect_path] || 'http://www.example.com/'
6
+ end
7
+
8
+ def call(env)
9
+ if evil_robot?(env)
10
+ goodbye
11
+ else
12
+ if env['PATH_INFO'] =~ /honey_pot/
13
+ [200, {'Content-Type' => 'text/html'}, ['Mmmm Honey...']]
14
+ else
15
+ @app.call(env)
16
+ end
17
+ end
18
+ end
19
+
20
+ private
21
+ def evil_robot?(env)
22
+ env['HTTP_USER_AGENT'] && env['HTTP_USER_AGENT'] =~ evil_robots
23
+ end
24
+
25
+ def goodbye
26
+ [301, {'Location' => @redirect_path}, ['No thank you.']]
27
+ end
28
+
29
+ def evil_robots
30
+ # add your evil-bots here... these are just for examples sake
31
+ /badBot|reallyBadBot/i
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,49 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{rake-evil_robot}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Shane Wolf"]
12
+ s.date = %q{2010-08-06}
13
+ s.description = %q{Detect robots that are ignoring your robots.txt file and give them the middle finger}
14
+ s.email = %q{shanewolf@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "README"
17
+ ]
18
+ s.files = [
19
+ "README",
20
+ "Rakefile",
21
+ "VERSION",
22
+ "lib/rack/evil_robot.rb",
23
+ "spec/rack/evil_robot_spec.rb",
24
+ "spec/spec_helper.rb"
25
+ ]
26
+ s.homepage = %q{http://github.com/gizm0duck/rack-evil_robot}
27
+ s.rdoc_options = ["--charset=UTF-8"]
28
+ s.require_paths = ["lib"]
29
+ s.rubygems_version = %q{1.3.7}
30
+ s.summary = %q{Detect robots that are ignoring your robots.txt file and give them the middle finger}
31
+ s.test_files = [
32
+ "spec/rack/evil_robot_spec.rb",
33
+ "spec/spec_helper.rb"
34
+ ]
35
+
36
+ if s.respond_to? :specification_version then
37
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
38
+ s.specification_version = 3
39
+
40
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
41
+ s.add_runtime_dependency(%q<rack>, [">= 0"])
42
+ else
43
+ s.add_dependency(%q<rack>, [">= 0"])
44
+ end
45
+ else
46
+ s.add_dependency(%q<rack>, [">= 0"])
47
+ end
48
+ end
49
+
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ describe Rack::EvilRobot do
4
+
5
+ describe "call" do
6
+ attr_reader :app
7
+ before do
8
+ @app = lambda { |env| [200, {'Content-Type' => 'text/plain'}, ['This is my body']] }
9
+ end
10
+
11
+ context "when the user agent is in the evil robots list" do
12
+ describe "when passing in custom redirect_path" do
13
+ it "redirects to the custom path" do
14
+ request = Rack::MockRequest.env_for('/real_path', {'HTTP_USER_AGENT' => 'badBot'})
15
+ response = Rack::EvilRobot.new(app, {:redirect_path => "http://www.google.com"}).call(request)
16
+ response[0].should == 301
17
+ response[1]["Location"].should == "http://www.google.com"
18
+ response[2].should == ["No thank you."]
19
+ end
20
+ end
21
+
22
+ describe "when no custom options are passed in" do
23
+ it "redirects the bot to example.com" do
24
+ request = Rack::MockRequest.env_for('/real_path', {'HTTP_USER_AGENT' => 'badBot'})
25
+ response = Rack::EvilRobot.new(app).call(request)
26
+ response[0].should == 301
27
+ response[1]["Location"].should == "http://www.example.com/"
28
+ response[2].should == ["No thank you."]
29
+ end
30
+ end
31
+ end
32
+
33
+ context "when the user agent is not in the evil robots list" do
34
+ context "when the request hits the honey pot" do
35
+ it "tells the robot it's been busted" do
36
+ request = Rack::MockRequest.env_for('/honey_pot/sticky.html', {'HTTP_USER_AGENT' => 'Mozilla/5.0'})
37
+ response = Rack::EvilRobot.new(app).call(request)
38
+ response.first.should == 200
39
+ response.last.should == ["Mmmm Honey..."]
40
+ end
41
+ end
42
+
43
+ context "when the request does not hit the honey pot" do
44
+ it "does nothing" do
45
+ request = Rack::MockRequest.env_for('/real_path', {'HTTP_USER_AGENT' => 'Mozilla/5.0'})
46
+ body = Rack::EvilRobot.new(app).call(request).last
47
+ body.should == ['This is my body']
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'rack/evil_robot'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+ require 'rack'
7
+
8
+
9
+ Spec::Runner.configure do |config|
10
+
11
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rack-evil_robot
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Shane Wolf
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-08-06 00:00:00 -07:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rack
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Detect robots that are ignoring your robots.txt file and give them the middle finger
36
+ email: shanewolf@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README.rdoc
43
+ files:
44
+ - .gitignore
45
+ - README.rdoc
46
+ - Rakefile
47
+ - VERSION
48
+ - lib/rack/evil_robot.rb
49
+ - rake-evil_robot.gemspec
50
+ - spec/rack/evil_robot_spec.rb
51
+ - spec/spec_helper.rb
52
+ has_rdoc: true
53
+ homepage: http://github.com/gizm0duck/rack-evil_robot
54
+ licenses: []
55
+
56
+ post_install_message:
57
+ rdoc_options:
58
+ - --charset=UTF-8
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 3
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.3.7
83
+ signing_key:
84
+ specification_version: 3
85
+ summary: Detect robots that are ignoring your robots.txt file and give them the middle finger
86
+ test_files:
87
+ - spec/rack/evil_robot_spec.rb
88
+ - spec/spec_helper.rb