ghazel-httpbl 0.1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -0
- data/LICENSE +21 -0
- data/Manifest +7 -0
- data/README +168 -0
- data/Rakefile +7 -0
- data/ghazel-httpbl.gemspec +33 -0
- data/httpbl.gemspec +22 -0
- data/lib/httpbl.rb +90 -0
- metadata +79 -0
data/CHANGELOG
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2009 Brandon Palmen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/Manifest
ADDED
data/README
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
HttpBL
|
|
2
|
+
===========
|
|
3
|
+
|
|
4
|
+
HttpBL is drop-in IP-filtering middleware for Rails 2.3+ and other Rack-based
|
|
5
|
+
applications. It resolves information about each request's source IP address
|
|
6
|
+
from the Http:BL service at http://projecthoneypot.org, and denies access to
|
|
7
|
+
clients whose IP addresses are associated with suspicious behavior like impolite
|
|
8
|
+
crawling, comment-spamming, dictionary attacks, and email-harvesting.
|
|
9
|
+
|
|
10
|
+
* Deny access to IP addresses that are associated with suspicious
|
|
11
|
+
behavior which exceeds a customizable threshold.
|
|
12
|
+
* Expire blocked IPs that have not been associated with suspicious
|
|
13
|
+
behavior after a customizable period of days.
|
|
14
|
+
* Identify common search engines by IP address (not User-Agent), and
|
|
15
|
+
disallow access to a specific subset.
|
|
16
|
+
* Optionally use memcached to avoid repeated look-ups per client-session
|
|
17
|
+
|
|
18
|
+
Installation
|
|
19
|
+
------------
|
|
20
|
+
|
|
21
|
+
gem install httpbl
|
|
22
|
+
|
|
23
|
+
Basic Usage
|
|
24
|
+
------------
|
|
25
|
+
|
|
26
|
+
HttpBL is Rack middleware, and can be used with any Rack-based application. First,
|
|
27
|
+
you must obtain an API key for the Http:BL service at http://projecthoneypot.org
|
|
28
|
+
|
|
29
|
+
To add HttpBL to your middleware stack, simply add the following to config.ru:
|
|
30
|
+
|
|
31
|
+
require 'httpbl'
|
|
32
|
+
|
|
33
|
+
use HttpBL, :api_key => "YOUR API KEY"
|
|
34
|
+
|
|
35
|
+
For Rails 2.3+ add the following to environment.rb:
|
|
36
|
+
|
|
37
|
+
require 'httpbl'
|
|
38
|
+
|
|
39
|
+
config.middleware.use HttpBL, :api_key => "YOUR API KEY"
|
|
40
|
+
|
|
41
|
+
Advanced Usage
|
|
42
|
+
-------------
|
|
43
|
+
|
|
44
|
+
To insert HttpBL at the top of the Rails rackstack:
|
|
45
|
+
(use 'rake middleware' to confirm that Rack::Lock is at the top of the stack)
|
|
46
|
+
|
|
47
|
+
config.middleware.insert_before(Rack::Lock, HttpBL, :api_key => "YOUR API KEY")
|
|
48
|
+
|
|
49
|
+
To customize HttpBL's filtering behavior, use the available options:
|
|
50
|
+
|
|
51
|
+
use HttpBL, :api_key => "YOUR API KEY",
|
|
52
|
+
:deny_types => [1, 2, 4],
|
|
53
|
+
:threat_level_threshold => 0,
|
|
54
|
+
:age_threshold => 5,
|
|
55
|
+
:blocked_search_engines => [0],
|
|
56
|
+
:memcached_server => "127.0.0.1:11211",
|
|
57
|
+
:memcached_options => {see: memcache-client documentation}
|
|
58
|
+
|
|
59
|
+
Available Options:
|
|
60
|
+
|
|
61
|
+
The following options (shown with default values) are available to
|
|
62
|
+
customize the behavior of the httpbl middleware filter:
|
|
63
|
+
|
|
64
|
+
:deny_types => [1, 2, 4, 8, 16, 32, 64, 128]
|
|
65
|
+
|
|
66
|
+
Project Honeypot classifies suspicious behavior as belonging to
|
|
67
|
+
certain types, which are identified in the API's response to
|
|
68
|
+
each IP lookup. You can tell HttpBL to only deny certain kinds
|
|
69
|
+
of behavior by changing this to a subset of those possible.
|
|
70
|
+
|
|
71
|
+
As of March 2009, only types 1, 2, and 4 have been specified,
|
|
72
|
+
but additional types are reserved for the future and HttpBL checks
|
|
73
|
+
against all of the anticipated type codes by default. Thus,
|
|
74
|
+
there may be a very small performance advantage to setting
|
|
75
|
+
:deny_types => [1, 2, 4] simply to exclude checks for codes
|
|
76
|
+
that aren't (yet) being used; however, this will have to be
|
|
77
|
+
updated if more codes come into use, whereas the default
|
|
78
|
+
requires no further attention.
|
|
79
|
+
|
|
80
|
+
The current types are:
|
|
81
|
+
1: Suspicious
|
|
82
|
+
2: Harvester
|
|
83
|
+
4: Comment Spammer
|
|
84
|
+
|
|
85
|
+
:threat_level_threshold => 2
|
|
86
|
+
|
|
87
|
+
The threat level reported by Project Honeypot is based on a
|
|
88
|
+
logarithmic scale, approximated by:
|
|
89
|
+
1: 1 spam
|
|
90
|
+
25: 100 spam
|
|
91
|
+
50: 10,000 spam
|
|
92
|
+
100: 1,000,000 spam.
|
|
93
|
+
in which spam is pronounced spam even in the plural.
|
|
94
|
+
|
|
95
|
+
Choosing a threat level threshold can be tricky business if
|
|
96
|
+
one isn't sure how accurate the measure of threat is, since it
|
|
97
|
+
would be improper to block legitimate traffic by mistake. Because
|
|
98
|
+
the email addresses that Project Honeypot uses as spam-bait are unique,
|
|
99
|
+
artificial, and well-hidden, NO email should be sent to those addresses
|
|
100
|
+
at all, and it is fair to assume that even the low threat level
|
|
101
|
+
associated with just a few spam is still significant.
|
|
102
|
+
|
|
103
|
+
With that in mind, the default threshold is 2; if you want to
|
|
104
|
+
filter more aggressively, set :threat_level_threshold => 0
|
|
105
|
+
|
|
106
|
+
:age_threshold => 10
|
|
107
|
+
|
|
108
|
+
This sets the number of days that IP addresses that have been
|
|
109
|
+
associated with suspicous activity must wait to regain access after
|
|
110
|
+
the suspicious activity has ceased. Keeping this at a sane value will
|
|
111
|
+
allow IPs that are reassigned or cleaned up to expire from the blacklist.
|
|
112
|
+
|
|
113
|
+
If you want to be more aggressive (require a longer cool-off-period),
|
|
114
|
+
set :age_threshold => 30; if you want to let IPs back in after just a
|
|
115
|
+
few days, set :age_threshold => 5
|
|
116
|
+
|
|
117
|
+
:blocked_search_engines => []
|
|
118
|
+
|
|
119
|
+
Because Project Honeypot identifies search engine traffic by IP
|
|
120
|
+
address, this filter may be used to exclude certain robots from your
|
|
121
|
+
site. If one presumes that request-IPs are at least marginally more
|
|
122
|
+
difficult to spoof than User-Agent strings, this filter may be marginally
|
|
123
|
+
more effective than some other robot detection systems.
|
|
124
|
+
|
|
125
|
+
If there are particular search engines that you would like to exclude
|
|
126
|
+
from your site, set :blocked_search_engines => [0, ... ] where the codes
|
|
127
|
+
defined by http://projecthoneypot.org/httpbl_api.php are:
|
|
128
|
+
|
|
129
|
+
0: Undocumented
|
|
130
|
+
1: AltaVista
|
|
131
|
+
2: Ask
|
|
132
|
+
3: Baidu
|
|
133
|
+
4: Excite
|
|
134
|
+
5: Google
|
|
135
|
+
6: Looksmart
|
|
136
|
+
7: Lycos
|
|
137
|
+
8: MSN
|
|
138
|
+
9: Yahoo
|
|
139
|
+
10: Cuil
|
|
140
|
+
11: InfoSeek
|
|
141
|
+
12: Miscellaneous
|
|
142
|
+
|
|
143
|
+
:memcached_server => nil
|
|
144
|
+
:memcached_options => {}
|
|
145
|
+
|
|
146
|
+
When using httpbl in a production environment, it is *strongly* recommended
|
|
147
|
+
that you configure httpbl to use memcached to temporarily store the blacklist
|
|
148
|
+
status of client ip addresses. This greatly enhances the efficiency of the
|
|
149
|
+
filter because it need only look up each client ip address once per session,
|
|
150
|
+
instead of once per request. It also reduces the potential burden of a
|
|
151
|
+
popular web application that uses httpbl on project honeypot's api services.
|
|
152
|
+
|
|
153
|
+
Simply set :memcached_server and :memcached_options according to the
|
|
154
|
+
conventions of the memcache-client ruby library; for example:
|
|
155
|
+
:memcached_server => '127.0.0.1:11211', :memcached_options => {:namespace => 'my_app'}
|
|
156
|
+
|
|
157
|
+
memcache-client is included in rails by default, but if you're using rack
|
|
158
|
+
without rails, you will need to install and require the memcache-client gem.
|
|
159
|
+
|
|
160
|
+
:dns_timeout => 0.5
|
|
161
|
+
|
|
162
|
+
DNS requests to the Http:BL service shouldn't take this long, but if
|
|
163
|
+
they do, you can modify this setting to prevent the request from
|
|
164
|
+
hanging until a system default timeout. Of course, setting this timeout
|
|
165
|
+
too low will essentially disable the filter (but 0 is a bad idea), if responses
|
|
166
|
+
can't be returned from the API before the request is permitted.
|
|
167
|
+
Best not to mess with it unless you know what you're doing - it's a safety
|
|
168
|
+
mechanism.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
Gem::Specification.new do |s|
|
|
4
|
+
s.name = %q{ghazel-httpbl}
|
|
5
|
+
s.version = "0.1.6.1"
|
|
6
|
+
|
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
|
8
|
+
s.authors = ["Brandon Palmen"]
|
|
9
|
+
s.date = %q{2010-10-20}
|
|
10
|
+
s.description = %q{A Rack middleware IP filter that uses Http:BL to exclude suspicious robots.}
|
|
11
|
+
s.email = %q{}
|
|
12
|
+
s.extra_rdoc_files = ["README", "lib/httpbl.rb", "CHANGELOG", "LICENSE"]
|
|
13
|
+
s.files = ["README", "lib/httpbl.rb", "Rakefile", "httpbl.gemspec", "CHANGELOG", "LICENSE", "Manifest", "ghazel-httpbl.gemspec"]
|
|
14
|
+
s.homepage = %q{http://github.com/bpalmen/httpbl}
|
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Ghazel-httpbl", "--main", "README"]
|
|
16
|
+
s.require_paths = ["lib"]
|
|
17
|
+
s.rubyforge_project = %q{ghazel-httpbl}
|
|
18
|
+
s.rubygems_version = %q{1.3.5}
|
|
19
|
+
s.summary = %q{A Rack middleware IP filter that uses Http:BL to exclude suspicious robots.}
|
|
20
|
+
|
|
21
|
+
if s.respond_to? :specification_version then
|
|
22
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
|
23
|
+
s.specification_version = 3
|
|
24
|
+
|
|
25
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
|
26
|
+
s.add_runtime_dependency(%q<rack>, [">= 0"])
|
|
27
|
+
else
|
|
28
|
+
s.add_dependency(%q<rack>, [">= 0"])
|
|
29
|
+
end
|
|
30
|
+
else
|
|
31
|
+
s.add_dependency(%q<rack>, [">= 0"])
|
|
32
|
+
end
|
|
33
|
+
end
|
data/httpbl.gemspec
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Gem::Specification.new do |s|
|
|
2
|
+
s.name = 'httpbl'
|
|
3
|
+
s.version = '0.1.6.1'
|
|
4
|
+
s.date = '2009-05-28'
|
|
5
|
+
s.homepage = "http://bpalmen.github.com/httpbl/"
|
|
6
|
+
s.authors = ["Brandon Palmen"]
|
|
7
|
+
s.email = "brandon.palmen@gmail.com"
|
|
8
|
+
s.rubyforge_project = 'httpbl'
|
|
9
|
+
s.summary = "HttpBL is a Rack middleware filter that blocks requests from suspicious IP addresses."
|
|
10
|
+
s.description = "HttpBL is a Rack middleware filter that blocks requests from suspicious IP addresses."
|
|
11
|
+
|
|
12
|
+
s.files = %w[
|
|
13
|
+
README
|
|
14
|
+
CHANGELOG
|
|
15
|
+
LICENSE
|
|
16
|
+
lib/httpbl.rb
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
s.add_dependency 'rack', '>= 0.9.0'
|
|
20
|
+
s.extra_rdoc_files = %w[README]
|
|
21
|
+
s.require_paths = %w[lib]
|
|
22
|
+
end
|
data/lib/httpbl.rb
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# The Httpbl middleware
|
|
2
|
+
|
|
3
|
+
class HttpBL
|
|
4
|
+
autoload :Resolv, 'resolv'
|
|
5
|
+
|
|
6
|
+
def self.encourage_safe_timeouts
|
|
7
|
+
if /^1\.8/ =~ RUBY_VERSION
|
|
8
|
+
begin
|
|
9
|
+
require 'system_timer'
|
|
10
|
+
@@DnsTimeout = SystemTimer
|
|
11
|
+
rescue LoadError
|
|
12
|
+
require 'timeout'
|
|
13
|
+
@@DnsTimeout = Timeout
|
|
14
|
+
end
|
|
15
|
+
else
|
|
16
|
+
require 'timeout'
|
|
17
|
+
@@DnsTimeout = Timeout
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
encourage_safe_timeouts
|
|
22
|
+
|
|
23
|
+
def initialize(app, options = {})
|
|
24
|
+
@app = app
|
|
25
|
+
@options = {:blocked_search_engines => [],
|
|
26
|
+
:age_threshold => 10,
|
|
27
|
+
:threat_level_threshold => 2,
|
|
28
|
+
:deny_types => [1, 2, 4, 8, 16, 32, 64, 128], # 8..128 aren't used as of 10/2009, but might be used in the future
|
|
29
|
+
:dns_timeout => 0.5,
|
|
30
|
+
:memcached_server => nil,
|
|
31
|
+
:memcached_options => {}
|
|
32
|
+
}.merge(options)
|
|
33
|
+
raise "Missing :api_key for Http:BL middleware" unless @options[:api_key]
|
|
34
|
+
if @options[:memcached_server]
|
|
35
|
+
require 'memcache'
|
|
36
|
+
@cache = MemCache.new(@options[:memcached_server], @options[:memcached_options])
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def call(env)
|
|
41
|
+
dup._call(env)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def _call(env)
|
|
45
|
+
request = Rack::Request.new(env)
|
|
46
|
+
bl_status = check(request.ip)
|
|
47
|
+
if bl_status and blocked?(bl_status)
|
|
48
|
+
[403, {"Content-Type" => "text/html"}, "<h1>403 Forbidden</h1> Request IP is listed as suspicious by <a href='http://projecthoneypot.org/ip_#{request.ip}'>Project Honeypot</a>"]
|
|
49
|
+
else
|
|
50
|
+
@app.call(env)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def check(ip)
|
|
56
|
+
@cache ? cache_check(ip) : resolve(ip)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def cache_check(ip)
|
|
60
|
+
cache = @cache.clone if @cache
|
|
61
|
+
unless response = cache.get("httpbl_#{ip}")
|
|
62
|
+
response = resolve(ip)
|
|
63
|
+
cache.set("httpbl_#{ip}", (response || "0.0.0.0"), 1.hour)
|
|
64
|
+
end
|
|
65
|
+
return response
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def resolve(ip)
|
|
69
|
+
query = @options[:api_key] + '.' + ip.split('.').reverse.join('.') + '.dnsbl.httpbl.org'
|
|
70
|
+
@@DnsTimeout::timeout(@options[:dns_timeout]) do
|
|
71
|
+
Resolv::DNS.new.getaddress(query).to_s rescue false
|
|
72
|
+
end
|
|
73
|
+
rescue Timeout::Error, Errno::ECONNREFUSED
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def blocked?(response)
|
|
77
|
+
response = response.split('.').collect!(&:to_i)
|
|
78
|
+
if response[0] == 127
|
|
79
|
+
if response[3] == 0
|
|
80
|
+
blocked = @options[:blocked_search_engines].include?(response[2])
|
|
81
|
+
else
|
|
82
|
+
blocked = @options[:deny_types].collect{|key| response[3] & key == key }.any? and response[2] > @options[:threat_level_threshold] and response[1] < @options[:age_threshold]
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
return blocked
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: ghazel-httpbl
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.6.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Brandon Palmen
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
|
|
12
|
+
date: 2010-10-20 00:00:00 -07:00
|
|
13
|
+
default_executable:
|
|
14
|
+
dependencies:
|
|
15
|
+
- !ruby/object:Gem::Dependency
|
|
16
|
+
name: rack
|
|
17
|
+
type: :runtime
|
|
18
|
+
version_requirement:
|
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
20
|
+
requirements:
|
|
21
|
+
- - ">="
|
|
22
|
+
- !ruby/object:Gem::Version
|
|
23
|
+
version: "0"
|
|
24
|
+
version:
|
|
25
|
+
description: A Rack middleware IP filter that uses Http:BL to exclude suspicious robots.
|
|
26
|
+
email: ""
|
|
27
|
+
executables: []
|
|
28
|
+
|
|
29
|
+
extensions: []
|
|
30
|
+
|
|
31
|
+
extra_rdoc_files:
|
|
32
|
+
- README
|
|
33
|
+
- lib/httpbl.rb
|
|
34
|
+
- CHANGELOG
|
|
35
|
+
- LICENSE
|
|
36
|
+
files:
|
|
37
|
+
- README
|
|
38
|
+
- lib/httpbl.rb
|
|
39
|
+
- Rakefile
|
|
40
|
+
- httpbl.gemspec
|
|
41
|
+
- CHANGELOG
|
|
42
|
+
- LICENSE
|
|
43
|
+
- Manifest
|
|
44
|
+
- ghazel-httpbl.gemspec
|
|
45
|
+
has_rdoc: true
|
|
46
|
+
homepage: http://github.com/bpalmen/httpbl
|
|
47
|
+
licenses: []
|
|
48
|
+
|
|
49
|
+
post_install_message:
|
|
50
|
+
rdoc_options:
|
|
51
|
+
- --line-numbers
|
|
52
|
+
- --inline-source
|
|
53
|
+
- --title
|
|
54
|
+
- Ghazel-httpbl
|
|
55
|
+
- --main
|
|
56
|
+
- README
|
|
57
|
+
require_paths:
|
|
58
|
+
- lib
|
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
60
|
+
requirements:
|
|
61
|
+
- - ">="
|
|
62
|
+
- !ruby/object:Gem::Version
|
|
63
|
+
version: "0"
|
|
64
|
+
version:
|
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
|
+
requirements:
|
|
67
|
+
- - ">="
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: "1.2"
|
|
70
|
+
version:
|
|
71
|
+
requirements: []
|
|
72
|
+
|
|
73
|
+
rubyforge_project: ghazel-httpbl
|
|
74
|
+
rubygems_version: 1.3.5
|
|
75
|
+
signing_key:
|
|
76
|
+
specification_version: 3
|
|
77
|
+
summary: A Rack middleware IP filter that uses Http:BL to exclude suspicious robots.
|
|
78
|
+
test_files: []
|
|
79
|
+
|