rack-evil_robot 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/README.rdoc +31 -0
- data/Rakefile +44 -0
- data/VERSION +1 -0
- data/lib/rack/evil_robot.rb +34 -0
- data/rake-evil_robot.gemspec +49 -0
- data/spec/rack/evil_robot_spec.rb +52 -0
- data/spec/spec_helper.rb +11 -0
- metadata +88 -0
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
pkg
|
data/README.rdoc
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
= Rack::EvilRobot
|
2
|
+
|
3
|
+
== INSTALL
|
4
|
+
|
5
|
+
sudo gem install rack-evil_robot
|
6
|
+
|
7
|
+
== SETUP
|
8
|
+
|
9
|
+
require 'rack/evil_robot'
|
10
|
+
use Rack::EvilRobot
|
11
|
+
|
12
|
+
or
|
13
|
+
|
14
|
+
use Rack::EvilRobot, :redirect_path => "http://www.whatever-you-want.com"
|
15
|
+
|
16
|
+
== SET THE TRAP
|
17
|
+
|
18
|
+
Update your robots.txt to contain these lines
|
19
|
+
|
20
|
+
User-agent: *
|
21
|
+
Disallow: /honey_pot/index.html
|
22
|
+
|
23
|
+
Include this link tag on your page(s)
|
24
|
+
<a href="/honey_pot/index.html"><img src="images/pixel.gif" border="0" alt=" " width="1" height="1"></a>
|
25
|
+
|
26
|
+
After you have this set up for a few days check your access logs and look at the user agents that have been accessing /honey_pot/index.html.
|
27
|
+
Hopefully there is nothing there, but if there is, and they are misbehaving, add them to the regex defined in the 'evil_robots' method.
|
28
|
+
This will prevent this user agent from accessing any pages on your site anymore.
|
29
|
+
|
30
|
+
== Win
|
31
|
+
Now you don't have to worry about getting crawled by bots you don't care to be hit by (mp3 bots, torrent bots, bots that are slamming your servers, etc...)
|
data/Rakefile
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "rack-evil_robot"
|
8
|
+
gem.summary = "Detect robots that are ignoring your robots.txt file and give them the middle finger"
|
9
|
+
gem.description = "Detect robots that are ignoring your robots.txt file and give them the middle finger"
|
10
|
+
gem.email = "shanewolf@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/gizm0duck/rack-evil_robot"
|
12
|
+
gem.authors = ["Shane Wolf"]
|
13
|
+
gem.add_dependency "rack"
|
14
|
+
end
|
15
|
+
Jeweler::GemcutterTasks.new
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'spec/rake/spectask'
|
21
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
22
|
+
spec.libs << 'lib' << 'spec'
|
23
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
24
|
+
end
|
25
|
+
|
26
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
27
|
+
spec.libs << 'lib' << 'spec'
|
28
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
29
|
+
spec.rcov = true
|
30
|
+
end
|
31
|
+
|
32
|
+
task :spec => :check_dependencies
|
33
|
+
|
34
|
+
task :default => :spec
|
35
|
+
|
36
|
+
require 'rake/rdoctask'
|
37
|
+
Rake::RDocTask.new do |rdoc|
|
38
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
39
|
+
|
40
|
+
rdoc.rdoc_dir = 'rdoc'
|
41
|
+
rdoc.title = "rack-evil_robot #{version}"
|
42
|
+
rdoc.rdoc_files.include('README*')
|
43
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
44
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Rack
|
2
|
+
class EvilRobot
|
3
|
+
def initialize(app, options = {})
|
4
|
+
@app = app
|
5
|
+
@redirect_path = options[:redirect_path] || 'http://www.example.com/'
|
6
|
+
end
|
7
|
+
|
8
|
+
def call(env)
|
9
|
+
if evil_robot?(env)
|
10
|
+
goodbye
|
11
|
+
else
|
12
|
+
if env['PATH_INFO'] =~ /honey_pot/
|
13
|
+
[200, {'Content-Type' => 'text/html'}, ['Mmmm Honey...']]
|
14
|
+
else
|
15
|
+
@app.call(env)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
def evil_robot?(env)
|
22
|
+
env['HTTP_USER_AGENT'] && env['HTTP_USER_AGENT'] =~ evil_robots
|
23
|
+
end
|
24
|
+
|
25
|
+
def goodbye
|
26
|
+
[301, {'Location' => @redirect_path}, ['No thank you.']]
|
27
|
+
end
|
28
|
+
|
29
|
+
def evil_robots
|
30
|
+
# add your evil-bots here... these are just for examples sake
|
31
|
+
/badBot|reallyBadBot/i
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{rake-evil_robot}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Shane Wolf"]
|
12
|
+
s.date = %q{2010-08-06}
|
13
|
+
s.description = %q{Detect robots that are ignoring your robots.txt file and give them the middle finger}
|
14
|
+
s.email = %q{shanewolf@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"README"
|
17
|
+
]
|
18
|
+
s.files = [
|
19
|
+
"README",
|
20
|
+
"Rakefile",
|
21
|
+
"VERSION",
|
22
|
+
"lib/rack/evil_robot.rb",
|
23
|
+
"spec/rack/evil_robot_spec.rb",
|
24
|
+
"spec/spec_helper.rb"
|
25
|
+
]
|
26
|
+
s.homepage = %q{http://github.com/gizm0duck/rack-evil_robot}
|
27
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
28
|
+
s.require_paths = ["lib"]
|
29
|
+
s.rubygems_version = %q{1.3.7}
|
30
|
+
s.summary = %q{Detect robots that are ignoring your robots.txt file and give them the middle finger}
|
31
|
+
s.test_files = [
|
32
|
+
"spec/rack/evil_robot_spec.rb",
|
33
|
+
"spec/spec_helper.rb"
|
34
|
+
]
|
35
|
+
|
36
|
+
if s.respond_to? :specification_version then
|
37
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
38
|
+
s.specification_version = 3
|
39
|
+
|
40
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
41
|
+
s.add_runtime_dependency(%q<rack>, [">= 0"])
|
42
|
+
else
|
43
|
+
s.add_dependency(%q<rack>, [">= 0"])
|
44
|
+
end
|
45
|
+
else
|
46
|
+
s.add_dependency(%q<rack>, [">= 0"])
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Rack::EvilRobot do
|
4
|
+
|
5
|
+
describe "call" do
|
6
|
+
attr_reader :app
|
7
|
+
before do
|
8
|
+
@app = lambda { |env| [200, {'Content-Type' => 'text/plain'}, ['This is my body']] }
|
9
|
+
end
|
10
|
+
|
11
|
+
context "when the user agent is in the evil robots list" do
|
12
|
+
describe "when passing in custom redirect_path" do
|
13
|
+
it "redirects to the custom path" do
|
14
|
+
request = Rack::MockRequest.env_for('/real_path', {'HTTP_USER_AGENT' => 'badBot'})
|
15
|
+
response = Rack::EvilRobot.new(app, {:redirect_path => "http://www.google.com"}).call(request)
|
16
|
+
response[0].should == 301
|
17
|
+
response[1]["Location"].should == "http://www.google.com"
|
18
|
+
response[2].should == ["No thank you."]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "when no custom options are passed in" do
|
23
|
+
it "redirects the bot to example.com" do
|
24
|
+
request = Rack::MockRequest.env_for('/real_path', {'HTTP_USER_AGENT' => 'badBot'})
|
25
|
+
response = Rack::EvilRobot.new(app).call(request)
|
26
|
+
response[0].should == 301
|
27
|
+
response[1]["Location"].should == "http://www.example.com/"
|
28
|
+
response[2].should == ["No thank you."]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "when the user agent is not in the evil robots list" do
|
34
|
+
context "when the request hits the honey pot" do
|
35
|
+
it "tells the robot it's been busted" do
|
36
|
+
request = Rack::MockRequest.env_for('/honey_pot/sticky.html', {'HTTP_USER_AGENT' => 'Mozilla/5.0'})
|
37
|
+
response = Rack::EvilRobot.new(app).call(request)
|
38
|
+
response.first.should == 200
|
39
|
+
response.last.should == ["Mmmm Honey..."]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
context "when the request does not hit the honey pot" do
|
44
|
+
it "does nothing" do
|
45
|
+
request = Rack::MockRequest.env_for('/real_path', {'HTTP_USER_AGENT' => 'Mozilla/5.0'})
|
46
|
+
body = Rack::EvilRobot.new(app).call(request).last
|
47
|
+
body.should == ['This is my body']
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rack-evil_robot
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Shane Wolf
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-08-06 00:00:00 -07:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rack
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Detect robots that are ignoring your robots.txt file and give them the middle finger
|
36
|
+
email: shanewolf@gmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README.rdoc
|
43
|
+
files:
|
44
|
+
- .gitignore
|
45
|
+
- README.rdoc
|
46
|
+
- Rakefile
|
47
|
+
- VERSION
|
48
|
+
- lib/rack/evil_robot.rb
|
49
|
+
- rake-evil_robot.gemspec
|
50
|
+
- spec/rack/evil_robot_spec.rb
|
51
|
+
- spec/spec_helper.rb
|
52
|
+
has_rdoc: true
|
53
|
+
homepage: http://github.com/gizm0duck/rack-evil_robot
|
54
|
+
licenses: []
|
55
|
+
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options:
|
58
|
+
- --charset=UTF-8
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
hash: 3
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
requirements: []
|
80
|
+
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 1.3.7
|
83
|
+
signing_key:
|
84
|
+
specification_version: 3
|
85
|
+
summary: Detect robots that are ignoring your robots.txt file and give them the middle finger
|
86
|
+
test_files:
|
87
|
+
- spec/rack/evil_robot_spec.rb
|
88
|
+
- spec/spec_helper.rb
|