spanx 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/.gitignore +20 -0
  2. data/.pairs +13 -0
  3. data/.rspec +2 -0
  4. data/.rvmrc +1 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +13 -0
  7. data/LICENSE +22 -0
  8. data/README.md +175 -0
  9. data/Rakefile +2 -0
  10. data/bin/spanx +7 -0
  11. data/conf/spanx-config.yml.example +44 -0
  12. data/conf/spanx-whitelist.txt.example +2 -0
  13. data/lib/spanx.rb +38 -0
  14. data/lib/spanx/actor/analyzer.rb +94 -0
  15. data/lib/spanx/actor/collector.rb +64 -0
  16. data/lib/spanx/actor/log_reader.rb +46 -0
  17. data/lib/spanx/actor/writer.rb +68 -0
  18. data/lib/spanx/cli.rb +47 -0
  19. data/lib/spanx/cli/analyze.rb +50 -0
  20. data/lib/spanx/cli/disable.rb +36 -0
  21. data/lib/spanx/cli/enable.rb +36 -0
  22. data/lib/spanx/cli/flush.rb +36 -0
  23. data/lib/spanx/cli/watch.rb +91 -0
  24. data/lib/spanx/config.rb +45 -0
  25. data/lib/spanx/helper.rb +8 -0
  26. data/lib/spanx/helper/exit.rb +11 -0
  27. data/lib/spanx/helper/subclassing.rb +31 -0
  28. data/lib/spanx/helper/timing.rb +9 -0
  29. data/lib/spanx/ip_checker.rb +5 -0
  30. data/lib/spanx/logger.rb +47 -0
  31. data/lib/spanx/notifier/audit_log.rb +18 -0
  32. data/lib/spanx/notifier/base.rb +22 -0
  33. data/lib/spanx/notifier/campfire.rb +47 -0
  34. data/lib/spanx/notifier/email.rb +61 -0
  35. data/lib/spanx/runner.rb +74 -0
  36. data/lib/spanx/usage.rb +9 -0
  37. data/lib/spanx/version.rb +3 -0
  38. data/lib/spanx/whitelist.rb +31 -0
  39. data/spanx.gemspec +32 -0
  40. data/spec/fixtures/access.log.1 +104 -0
  41. data/spec/fixtures/access.log.bots +7 -0
  42. data/spec/fixtures/config.yml +10 -0
  43. data/spec/fixtures/config_with_checks.yml +18 -0
  44. data/spec/fixtures/whitelist.txt +4 -0
  45. data/spec/spanx/actor/analyzer_spec.rb +114 -0
  46. data/spec/spanx/actor/collector_spec.rb +4 -0
  47. data/spec/spanx/actor/log_reader_spec.rb +68 -0
  48. data/spec/spanx/actor/writer_spec.rb +63 -0
  49. data/spec/spanx/config_spec.rb +62 -0
  50. data/spec/spanx/helper/timing_spec.rb +22 -0
  51. data/spec/spanx/notifier/base_spec.rb +16 -0
  52. data/spec/spanx/notifier/campfire_spec.rb +5 -0
  53. data/spec/spanx/notifier/email_spec.rb +121 -0
  54. data/spec/spanx/runner_spec.rb +102 -0
  55. data/spec/spanx/whitelist_spec.rb +66 -0
  56. data/spec/spec_helper.rb +25 -0
  57. data/spec/support/fakeredis.rb +1 -0
  58. data/spec/support/mail.rb +10 -0
  59. metadata +302 -0
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .idea
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+
20
+ spanx-config.yml
data/.pairs ADDED
@@ -0,0 +1,13 @@
1
+ pairs:
2
+ ag: Atasay Gokkaya; atasay
3
+ km: Kaan Meralan; kaan
4
+ kg: Konstantin Gredeskoul; kig
5
+ ph: Paul Henry; paul
6
+ sf: Sean Flannagan; sean
7
+ es: Eric Saxby; sax
8
+ tn: Truong Nguyen; constantx
9
+ cc: Cihan Cimen; cihan
10
+ sc: Server Cimen; server
11
+ email:
12
+ prefix: pair
13
+ domain: wanelo.com
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use 1.9.3@spanx --create
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in spanx.gemspec
4
+ gemspec
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ #^syntax detection
3
+
4
+ # A sample Guardfile
5
+ # More info at https://github.com/guard/guard#readme
6
+
7
+ guard 'rspec' do
8
+ watch(%r{^spanx\.gemspec}) { "spec"}
9
+ watch(%r{^spec/.+_spec\.rb$})
10
+ watch(%r{^lib/(.+)\.rb$}) { "spec" }
11
+ watch('spec/spec_helper.rb') { "spec" }
12
+ end
13
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Konstantin Gredeskoul
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,175 @@
1
+ # Spanx
2
+
3
+ Spank down IP spam: IP-based rate limiting for web applications behind HTTP server such as nginx or Apache.
4
+
5
+ Spanx is a simple Redis-based web request rate limiter, which integrates into any web application simply by monitoring
6
+ one or more HTTP server access log file(s) in real time (think Apache/nginx access.log).
7
+
8
+ Basic flow is as follows:
9
+
10
+ * Spanx tails the access.log file(s)
11
+ * parses out IP addresses of each request
12
+ * maintains a tally of request counts per IP, and per a time slice.
13
+ * Spanx is then able to detect when one or more IPs exceed the rate limiting configuration thresholds provided
14
+ (multiple thresholds are supported).
15
+ * When such IP is detected, Spanx immediately writes it out into a block-list file (suitable for consumption by nginx or
16
+ apache, in format eg "deny 127.0.0.1;"), and then
17
+ * executes a pre-configured command, presumed to reload HTTP server configuration (such as HUP nginx, etc) and activate new blocking rules.
18
+
19
+ Spanx additionally supports regular expression based white list file, that can be used to eliminate certain log lines
20
+ from the consideration (for example, you Googlebot based on User-Agent).
21
+
22
+ ### Design
23
+
24
+ Spanx can be integrated into part of your application, or can run as a standalone ruby app. Spanx requires ruby
25
+ 1.9.3, and it uses ruby threads to work on a few things in parallel.
26
+
27
+ Spanx has two main components:
28
+
29
+ 1. *watcher* is a process that monitors HTTP server log files, and updates Redis periodically with most recent counts.
30
+ Watcher also writes out the blocked IP file, if blocked IPs are found in Redis database.
31
+
32
+ 2. *analyzer* is a process that reads up to date information on IP addresses from Redis, and analyzes it. If any rate
33
+ limit-exceeding IPs are found, it writes them to the Redis DB, with an expiration TTL set.
34
+
35
+ If you have only one web server, you can run both watcher and analyzer as a single ruby process.
36
+
37
+ If you have multiple web servers, you need to run watcher on each server, and analyzer only once (somewhere).
38
+
39
+ ### Alerts
40
+
41
+ Besides actually writing out IPs to a block list file, Spanx supports notifiers that will be called when a new IP
42
+ is blocked. Currently supported are audit log notifier (that writes that information to a log file), a Campfire
43
+ Chat notifier, which will print IP blocking information into your Campfire chat room, and an Email notifier. It is
44
+ very easy to write additional notifiers.
45
+
46
+ ## Installation
47
+
48
+ Add this line to your application's Gemfile:
49
+
50
+ gem 'spanx'
51
+
52
+ And then execute:
53
+
54
+ $ bundle
55
+
56
+ Or install it yourself as:
57
+
58
+ $ gem install spanx
59
+
60
+ ### Dependencies
61
+
62
+ Spanx uses the Pause gem to persist state. This depends on Redis to save state and do set logic on the information it finds.
63
+
64
+ ## Usage
65
+
66
+ Spanx has a single executable with several sub-commands. In practice, multiple commands will
67
+ be run concurrently to do all of the necessary calculations.
68
+
69
+ Configuration can be provided via a YAML file (see example), and/or via command line options. Not
70
+ all configuration can be set via command line. If an option is provided in both YAML file and command line,
71
+ then latter is chosen.
72
+
73
+
74
+ ### watch
75
+
76
+ This command watches an HTTP server log file and writes out blocked IPs to a file specified.
77
+
78
+ ```bash
79
+ Usage: [bundle exec] spanx watch [options]
80
+ -f, --file ACCESS_LOG Apache/nginx access log file to scan continuously
81
+ -z, --analyze Analyze IPs also (as opposed to running `spanx analyze` in another process)
82
+ -b, --block_file BLOCK_FILE Output file to store NGINX block list
83
+ -c, --config CONFIG Path to config file (YML) (required)
84
+ -d, --daemonize Detach from TTY and run as a daemon
85
+ -g, --debug Log to STDOUT status of execution and some time metrics
86
+ -r, --run <shell command> Shell command to run anytime blocked ip file changes, for example "sudo pkill -HUP nginx"
87
+ -w, --whitelist WHITELIST File with newline separated reg exps, to exclude lines from access log
88
+ -h, --help Show this message
89
+ ```
90
+
91
+ ### analyze
92
+
93
+ Analyzes IPs found by the `watch` command. If an IP exceeds its maximum count for a time
94
+ period check (as set in the config file), the IP is written into Redis with a TTL defined by the
95
+ period check.
96
+
97
+ ```bash
98
+ Usage: [bundle exec] spanx analyze [options]
99
+ -a, --audit AUDIT_FILE Historical record of IP blocking decisions
100
+ -c, --config CONFIG Path to config file (YML) (required)
101
+ -d, --daemonize
102
+ -g, --debug Log status to STDOUT
103
+ -h, --help Show this message
104
+ ```
105
+
106
+ ### disable
107
+
108
+ Disables IP blocking. Note that this only effects the actual writing out
109
+ of block files, not of IP tracking or analysis. Note that this requires
110
+ a connection to redis, and thus requires the same config file used in
111
+ `analyze` and `watch`.
112
+
113
+ ```bash
114
+ Usage: [bundle exec] spanx disable [options]
115
+ -c, --config CONFIG Path to config file (YML) (required)
116
+ -g, --debug Log status to STDOUT
117
+ -h, --help Show this message
118
+ ```
119
+
120
+ ### disable
121
+
122
+ Reenables IP blocking if disabled. As with `disable`, the config file is
123
+ required to connect to redis.
124
+
125
+ ```bash
126
+ Usage: [bundle exec] spanx enable [options]
127
+ -c, --config CONFIG Path to config file (YML) (required)
128
+ -g, --debug Log status to STDOUT
129
+ -h, --help Show this message
130
+ ```
131
+
132
+ ### flush
133
+
134
+ This removes the persistence data around current IP blocks. Use this
135
+ when you want to remove all data around current blocks without (or in
136
+ addition to) disabling the blocker.
137
+
138
+ ```bash
139
+ Usage: [bundle exec] spanx flush [options]
140
+ -c, --config CONFIG Path to config file (YML) (required)
141
+ -g, --debug Log status to STDOUT
142
+ -h, --help Show this message
143
+ ```
144
+
145
+ ## Examples
146
+
147
+ If you have only one load balancer, you may want to centralize all work into a single process, as such:
148
+
149
+ ```bash
150
+ $ spanx watch -w /path/to/whitelist -c /path/to/spanx.conf.yml -z -d
151
+ ```
152
+
153
+ With multiple load balancers, this may not be desirable. All hosts will need to process their own access
154
+ log, but a minimum number of hosts should analyze the IP traffic.
155
+
156
+ ```bash
157
+ lb1 $ spanx watch -c spanx.conf.yml -r "sudo pkill -HUP nginx" --debug 2>&1 >> /var/log/spanx.watch.log &
158
+ lb2 $ spanx watch -c spanx.conf.yml -r "sudo pkill -HUP nginx" --debug 2>&1 >> /var/log/spanx.watch.log &
159
+
160
+ lb2 $ spanx analyze -c spanx.conf.yml -a spanx.audit.log --debug 2>&1 >> /var/log/spanx.analyze.log &
161
+ ```
162
+
163
+ ## Contributing
164
+
165
+ 1. Fork it
166
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
167
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
168
+ 4. Push to the branch (`git push origin my-new-feature`)
169
+ 5. Create new Pull Request
170
+
171
+ ## Maintainers
172
+
173
+ Konstantin Gredeskoul (@kigster) and Eric Saxby (@sax) at Wanelo, Inc (http://github.com/wanelo)
174
+
175
+ (c) 2012, All rights reserved.
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
3
+ require 'rubygems'
4
+ require 'bundler/setup' if File.exists?(ENV['BUNDLE_GEMFILE'])
5
+ require 'spanx'
6
+
7
+ Spanx::CLI.new.run
@@ -0,0 +1,44 @@
1
+ ---
2
+ :access_log: ""
3
+ :block_file: "block-ips.conf"
4
+ :audit_file: "spanx-audit.log"
5
+ :redis:
6
+ :host: "127.0.0.1"
7
+ :port: 6379
8
+ :db: 1
9
+ :collector:
10
+ :resolution: 300 # seconds
11
+ :history: 21600 # seconds
12
+ :flush_interval: 5
13
+ :log_reader:
14
+ :tail_interval: 1
15
+ :analyzer:
16
+ :analyze_interval: 20
17
+ :blocked_ip_notifiers:
18
+ - "Spanx::Notifier::AuditLog"
19
+ - "Spanx::Notifier::Campfire"
20
+ - "Spanx::Notifier::Email"
21
+ :period_checks:
22
+ - :period_seconds: 3600
23
+ :max_allowed: 2000
24
+ :block_ttl: 7200
25
+ - :period_seconds: 600
26
+ :max_allowed: 600
27
+ :block_ttl: 1200
28
+ - :period_seconds: 21600
29
+ :max_allowed: 8000
30
+ :block_ttl: 64800
31
+ :writer:
32
+ :write_interval: 10
33
+ :campfire:
34
+ :enabled: true
35
+ :room_id: 1111
36
+ :token: aaffdfsdfadfasdfasdfasdf
37
+ :account: test
38
+ :email:
39
+ :enabled: true
40
+ :to: "everyone@mycompany.com"
41
+ :from: "spanx@mycompany.com"
42
+ :password: "s3cVr3p4ssw0rd"
43
+ :domain: "mycompany.com"
44
+ :gateway: "smtp.gmail.com"
@@ -0,0 +1,2 @@
1
+ ^(8.8.8.[8-9])
2
+ msnbot|Googlebot|bingbot|Yahoo. Slurp|facebookexternalhit
@@ -0,0 +1,38 @@
1
+ require 'redis'
2
+ require 'pause'
3
+ require 'spanx/version'
4
+ require 'spanx/helper'
5
+ require 'spanx/logger'
6
+ require 'spanx/config'
7
+ require 'spanx/usage'
8
+
9
+ require 'spanx/ip_checker'
10
+
11
+ require 'spanx/cli'
12
+ require 'spanx/notifier/base'
13
+ require 'spanx/notifier/campfire'
14
+ require 'spanx/notifier/audit_log'
15
+ require 'spanx/notifier/email'
16
+
17
+ require 'spanx/actor/log_reader'
18
+ require 'spanx/actor/collector'
19
+ require 'spanx/actor/analyzer'
20
+ require 'spanx/actor/writer'
21
+ require 'spanx/whitelist'
22
+
23
+ require 'spanx/runner'
24
+
25
+ module Spanx
26
+ end
27
+
28
+ class String
29
+ def constantize
30
+ camel_cased_word = self
31
+ unless /\A(?:::)?([A-Z]\w*(?:::[A-Z]\w*)*)\z/ =~ camel_cased_word
32
+ raise NameError, "#{camel_cased_word.inspect} is not a valid constant name!"
33
+ end
34
+
35
+ Object.module_eval("::#{$1}", __FILE__, __LINE__)
36
+ end
37
+ end
38
+
@@ -0,0 +1,94 @@
1
+ require 'spanx/logger'
2
+ require 'spanx/helper/timing'
3
+ require 'spanx/notifier/base'
4
+ require 'spanx/notifier/campfire'
5
+ require 'spanx/notifier/audit_log'
6
+ require 'spanx/notifier/email'
7
+
8
+ module Spanx
9
+ module Actor
10
+ class Analyzer
11
+ include Spanx::Helper::Timing
12
+
13
+ attr_accessor :config, :notifiers, :blocked_ips
14
+
15
+ def initialize config
16
+ @config = config
17
+ @audit_file = config[:audit_file]
18
+ @notifiers = []
19
+ initialize_notifiers(config) if config[:analyzer][:blocked_ip_notifiers]
20
+
21
+ @blocked_ips = []
22
+ @previously_blocked_ips = []
23
+ end
24
+
25
+ def run
26
+ Thread.new do
27
+ Thread.current[:name] = "analyzer"
28
+ Logger.log "starting analyzer loop..."
29
+ loop do
30
+ analyze_all_ips()
31
+ sleep config[:analyzer][:analyze_interval]
32
+ end
33
+ end
34
+ end
35
+
36
+ # Look through every IP on the stack. IPs that fulfill a PeriodCheck
37
+ # are pushed onto a redis block list.
38
+ def analyze_all_ips
39
+ return unless Spanx::IPChecker.enabled?
40
+
41
+ @previously_blocked_ips = Spanx::IPChecker.blocked_identifiers
42
+
43
+ ips = Spanx::IPChecker.tracked_identifiers
44
+
45
+ Logger.logging "analyzed #{ips.size} IPs" do
46
+ ips.each do |ip|
47
+ blocked_ip = analyze_ip(ip)
48
+ blocked_ips << blocked_ip if blocked_ip
49
+ end
50
+ end
51
+
52
+ Logger.log "blocking [#{blocked_ips.size}] ips" unless blocked_ips.empty?
53
+ call_notifiers(blocked_ips)
54
+ blocked_ips.clear
55
+ end
56
+
57
+ # Analyze individual IP for all defined periods. As soon as one
58
+ # rule is triggered, exit the method
59
+ def analyze_ip(ip)
60
+ Spanx::IPChecker.new(ip).analyze
61
+ end
62
+
63
+ private
64
+
65
+ def initialize_notifiers(config)
66
+ notifiers_to_initialize = config[:analyzer][:blocked_ip_notifiers]
67
+ notifiers_to_initialize.each do |class_name|
68
+ Logger.logging "instantiating notifier #{class_name}" do
69
+ begin
70
+ notifier = class_name.constantize.new(config)
71
+ self.notifiers << notifier
72
+ rescue => e
73
+ Logger.log "error instantiating #{class_name}: #{e.inspect}, notifier disabled."
74
+ end
75
+ end
76
+ end
77
+ end
78
+
79
+ def call_notifiers(blocked_ips)
80
+ unless notifiers.empty?
81
+ blocked_ips.reject { |b| @previously_blocked_ips.include?(b.identifier) }.each do |blocked_ip|
82
+ self.notifiers.each do |notifier|
83
+ begin
84
+ notifier.publish(blocked_ip)
85
+ rescue => e
86
+ Logger.log "error notifying #{notifier.inspect} about blocked IP #{blocked_ip}: #{e.inspect}"
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end