ntail 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.5.1"
12
+ gem "rcov", ">= 0"
13
+ gem "rainbow", ">= 0"
14
+ gem "user-agent", ">= 0"
15
+ end
@@ -0,0 +1,24 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ git (1.2.5)
5
+ jeweler (1.5.1)
6
+ bundler (~> 1.0.0)
7
+ git (>= 1.2.5)
8
+ rake
9
+ rainbow (1.1)
10
+ rake (0.8.7)
11
+ rcov (0.9.9)
12
+ shoulda (2.11.3)
13
+ user-agent (1.0.0)
14
+
15
+ PLATFORMS
16
+ ruby
17
+
18
+ DEPENDENCIES
19
+ bundler (~> 1.0.0)
20
+ jeweler (~> 1.5.1)
21
+ rainbow
22
+ rcov
23
+ shoulda
24
+ user-agent
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Peter Vandenberk
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,90 @@
1
+ = ntail
2
+
3
+ A parsing tail(1)-like utility for nginx log files
4
+
5
+ > gem install ntail
6
+
7
+ == Examples
8
+
9
+ * read from STDIN and print each line to STDOUT <em>(stop with ^D)</em>
10
+
11
+ > ntail
12
+
13
+ * process an nginx log file and print each line to STDOUT
14
+
15
+ > ntail /var/log/nginx/access.log
16
+
17
+ * tail an "active" nginx log file and print each new line to STDOUT <em>(stop with ^C)</em>
18
+
19
+ > tail -f /var/log/nginx/access.log | ntail
20
+
21
+ * tail STDIN and print out the length of each line <em>(to illustrate -e option)</em>
22
+
23
+ > ntail -e '{ |line| puts line.size }'
24
+
25
+ * tail STDIN but only print out non-empty lines <em>(to illustrate -f option)</em>
26
+
27
+ > ntail -f '{ |line| line.size > 0 }'
28
+
29
+ * the following invocations behave exactly the same <em>(to illustrate -e and -f options)</em>
30
+
31
+ > ntail
32
+ > ntail -f '{ |line| true }' -e '{ |line| puts line }'
33
+
34
+ * find all HTTP requests that resulted in a '5xx' HTTP error/status code <em>(e.g. Rails 500 errors)</em>
35
+
36
+ > gunzip -S .gz -c access.log-20101216.gz | ntail -f '{ |line| line.server_error_status? }'
37
+
38
+ * generate a summary report of HTTP status codes, for all non-200 HTTP requests
39
+
40
+ > ntail -f '{ |line| line.status != "200" }' -e '{ |line| puts line.status }' access.log | sort | uniq -c
41
+ 76 301
42
+ 16 302
43
+ 2 304
44
+ 1 406
45
+
46
+ * print out GeoIP country and city information for each HTTP request <em>(depends on the optional <tt>geoip</tt> gem)</em>
47
+
48
+ > ntail -e '{ |line| puts [line.to_country, line.to_city].join("\t") }' /var/log/nginx/access.log
49
+ United States Los Angeles
50
+ United States Houston
51
+ Germany Berlin
52
+ United Kingdom London
53
+
54
+ * print out the IP address and the corresponding host name for each HTTP request <em>(slows things down considerably, due to <tt>nslookup</tt> call)</em>
55
+
56
+ > ntail -e '{ |line| puts [line.remote_address, line.to_host_name].join("\t") }' /var/log/nginx/access.log
57
+ 66.249.72.196 crawl-66-249-72-196.googlebot.com
58
+ 67.192.120.134 s402.pingdom.com
59
+ 75.31.109.144 adsl-75-31-109-144.dsl.irvnca.sbcglobal.net
60
+
61
+ == TODO
62
+
63
+ * implement a native <tt>"-f"</tt> option for ntail, similar to that of <tt>tail(1)</tt>
64
+ * implement a <tt>"-i"</tt> option ("ignore exceptions"/"continue processing"), if handling a single line raised an exception
65
+
66
+ * make <tt>PROXY_IP_ADDRESS</tt> configurable (from command line and/or rc file)
67
+ * make <tt>OFFICE_IP_ADDRESS</tt> configurable (from command line and/or rc file)
68
+ * make <tt>KNOWN_SEARCH_BOTS</tt> configurable (from command line and/or rc file)
69
+ * make <tt>INTERNAL_REFERERS</tt> configurable (from command line and/or rc file)
70
+ * make <tt>AUTOMATED_REQUESTS</tt> configurable (from command line and/or rc file)
71
+ * make <tt>STATIC_REPOS</tt> configurable (from command line and/or rc file)
72
+
73
+ == Acknowledgements
74
+
75
+ ntail's parsing feature is inspired by an nginx log parser written by {Richard Taylor (moomerman)}[https://github.com/moomerman]
76
+
77
+ == Contributing to ntail
78
+
79
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
80
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
81
+ * Fork the project
82
+ * Start a feature/bugfix branch
83
+ * Commit and push until you are happy with your contribution
84
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
85
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
86
+
87
+ == Copyright
88
+
89
+ Copyright (c) 2010 Peter Vandenberk. See LICENSE.txt for further details.
90
+
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "ntail"
16
+ gem.homepage = "http://github.com/pvdb/ntail"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{A parsing tail(1)-like utility for nginx log files}
19
+ gem.description = %Q{A parsing tail(1)-like utility for nginx log files}
20
+ gem.email = "pvandenberk@mac.com"
21
+ gem.authors = ["Peter Vandenberk"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
26
+ end
27
+ Jeweler::RubygemsDotOrgTasks.new
28
+
29
+ require 'rake/testtask'
30
+ Rake::TestTask.new(:test) do |test|
31
+ test.libs << 'lib' << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+
36
+ require 'rcov/rcovtask'
37
+ Rcov::RcovTask.new do |test|
38
+ test.libs << 'test'
39
+ test.pattern = 'test/**/test_*.rb'
40
+ test.verbose = true
41
+ end
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "ntail #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
+
5
+ begin
6
+ require 'ntail'
7
+ rescue LoadError
8
+ require 'rubygems'
9
+ require 'ntail'
10
+ end
11
+
12
+ exit NginxTail::Application.run!
13
+
14
+ # That's all, Folks!
@@ -0,0 +1,5 @@
1
+ NTAIL_NAME = 'ntail'
2
+ NTAIL_VERSION = '0.0.1'
3
+
4
+ require 'ntail/log_line'
5
+ require 'ntail/application'
@@ -0,0 +1,115 @@
1
+ require 'ostruct'
2
+ require 'optparse'
3
+
4
+ module NginxTail
5
+ class Application
6
+
7
+ def self.options
8
+ # application options from the command line
9
+ @@options ||= OpenStruct.new
10
+ end
11
+
12
+ def self.ntail_options
13
+ # shamelessly copied from lib/rake.rb (rake gem)
14
+ [
15
+ ['--verbose', '--v', "Run verbosely (log messages to STDERR).",
16
+ lambda { |value|
17
+ self.options.verbose = true
18
+ }
19
+ ],
20
+ ['--version', '-V', "Display the program version.",
21
+ lambda { |value|
22
+ puts "#{NTAIL_NAME}, version #{NTAIL_VERSION}"
23
+ self.options.running = false
24
+ }
25
+ ],
26
+ ['--filter', '-f CODE', "Ruby code block for filtering (parsed) lines - needs to return true or false.",
27
+ lambda { |value|
28
+ self.options.filter = eval "Proc.new #{value}"
29
+ }
30
+ ],
31
+ ['--execute', '-e CODE', "Ruby code block for processing each (parsed) line.",
32
+ lambda { |value|
33
+ self.options.code = eval "Proc.new #{value}"
34
+ }
35
+ ],
36
+ ]
37
+ end
38
+
39
+ def self.parse_options
40
+
41
+ # application defaults...
42
+ self.options.running = true
43
+ self.options.exit = 0
44
+
45
+ OptionParser.new do |opts|
46
+ opts.banner = "ntail {options} {file(s)} ..."
47
+ opts.separator ""
48
+ opts.separator "Options are ..."
49
+
50
+ opts.on_tail("-h", "--help", "-H", "Display this help message.") do
51
+ puts opts
52
+ self.options.running = false
53
+ end
54
+
55
+ self.ntail_options.each { |args| opts.on(*args) }
56
+ end.parse!
57
+ end
58
+
59
+ def self.run!
60
+
61
+ self.parse_options
62
+
63
+ ['TERM', 'INT'].each do |signal|
64
+ Signal.trap(signal) do
65
+ self.options.running = false ; puts
66
+ end
67
+ end
68
+
69
+ lines_read = lines_processed = lines_ignored = parsable_lines = unparsable_lines = 0
70
+
71
+ while self.options.running and ARGF.gets
72
+ raw_line = $_.chomp ; lines_read += 1
73
+ begin
74
+ log_line = NginxTail::LogLine.new(raw_line)
75
+ if log_line.parsable
76
+ parsable_lines += 1
77
+ if !self.options.filter || self.options.filter.call(log_line)
78
+ lines_processed += 1
79
+ if self.options.code
80
+ self.options.code.call(log_line)
81
+ else
82
+ puts log_line
83
+ end
84
+ else
85
+ lines_ignored += 1
86
+ if self.options.verbose
87
+ $stderr.puts "[WARNING] ignoring line ##{lines_read}"
88
+ end
89
+ end
90
+ else
91
+ unparsable_lines += 1
92
+ if self.options.verbose
93
+ $stderr.puts "[ERROR] cannot parse '#{raw_line}'"
94
+ end
95
+ end
96
+ rescue
97
+ $stderr.puts "[ERROR] processing line #{lines_read} resulted in #{$!.message}"
98
+ $stderr.puts "[ERROR] " + raw_line
99
+ self.options.exit = -1
100
+ self.options.running = false
101
+ end
102
+ end
103
+
104
+ if self.options.verbose
105
+ $stderr.puts "[INFO] read #{lines_read} lines"
106
+ $stderr.puts "[INFO] #{parsable_lines} parsable lines, #{unparsable_lines} unparsable lines"
107
+ $stderr.puts "[INFO] processed #{lines_processed} lines, ignored #{lines_ignored} lines"
108
+ end
109
+
110
+ return self.options.exit
111
+
112
+ end # def run
113
+
114
+ end
115
+ end
@@ -0,0 +1,375 @@
1
+ require 'socket'
2
+ require 'net/http'
3
+
4
+ require 'rubygems'
5
+ require 'rainbow'
6
+ require 'user-agent'
7
+
8
+ begin
9
+ require 'geoip'
10
+ rescue
11
+ # NOOP (optional dependency)
12
+ end
13
+
14
+ module NginxTail
15
+ class LogLine
16
+
17
+ attr_accessor :raw_line
18
+ attr_accessor :parsable
19
+
20
+ attr_accessor :remote_address
21
+ attr_accessor :remote_user
22
+ attr_accessor :time_local
23
+ attr_accessor :request
24
+ attr_accessor :status
25
+ attr_accessor :body_bytes_sent
26
+ attr_accessor :http_referer
27
+ attr_accessor :http_user_agent
28
+ attr_accessor :proxy_addresses
29
+
30
+ attr_accessor :http_method
31
+ attr_accessor :uri
32
+ attr_accessor :http_version
33
+
34
+ #
35
+ # http://wiki.nginx.org/NginxHttpLogModule#log_format - we currently only support the default "combined" log format...
36
+ #
37
+
38
+ NGINX_LOG_PATTERN = Regexp.compile(/\A(\S+) - (\S+) \[([^\]]+)\] "([^"]+)" (\S+) (\S+) "([^"]*?)" "([^"]*?)"( "([^"]*?)")?\Z/)
39
+ NGINX_REQUEST_PATTERN = Regexp.compile(/\A(\S+) (.*?) (\S+)\Z/)
40
+ NGINX_PROXY_PATTERN = Regexp.compile(/\A "([^"]*)"\Z/)
41
+
42
+ def initialize(line)
43
+ @parsable = if NGINX_LOG_PATTERN.match(@raw_line = line)
44
+ @remote_address, @remote_user, @time_local, @request, @status, @body_bytes_sent, @http_referer, @http_user_agent, @proxy_addresses = $~.captures
45
+ if NGINX_REQUEST_PATTERN.match(@request)
46
+ @http_method, @uri, @http_version = $~.captures
47
+ end
48
+ if @proxy_addresses and NGINX_PROXY_PATTERN.match(@proxy_addresses)
49
+ @proxy_addresses = $~.captures.first.split(/, /)
50
+ end
51
+ true
52
+ else
53
+ false
54
+ end
55
+ end
56
+
57
+ # for now, until we make it fancier...
58
+ def method_missing(method, *params)
59
+ raw_line.send method, *params
60
+ end
61
+
62
+ def to_s()
63
+ # simple but boring:
64
+ # raw_line.to_s
65
+ color = if redirect_status?
66
+ :yellow
67
+ elsif !success_status?
68
+ :red
69
+ else
70
+ :default
71
+ end
72
+ "%#{Sickill::Rainbow.enabled ? 15 + 9 : 15}s - %s - %s - %s" % [
73
+ remote_address.foreground(color),
74
+ status.foreground(color),
75
+ uri.foreground(color),
76
+ to_agent_s.foreground(color)
77
+ ]
78
+ end
79
+
80
+ COMPONENTS = [
81
+
82
+ :remote_address,
83
+ :remote_user,
84
+ :time_local,
85
+ :request,
86
+ :status,
87
+ :body_bytes_sent,
88
+ :http_referer,
89
+ :http_user_agent,
90
+ :proxy_addresses,
91
+
92
+ ]
93
+
94
+ SUBCOMPONENTS = [
95
+
96
+ :http_method,
97
+ :uri,
98
+ :http_version,
99
+
100
+ ]
101
+
102
+ CONVERSIONS = [
103
+
104
+ :to_date,
105
+ :to_agent,
106
+
107
+ :to_host_name,
108
+ :to_country,
109
+ :to_city,
110
+
111
+ ]
112
+
113
+ def self.log_subcomponent?(subcomponent) SUBCOMPONENTS.include?(subcomponent) ; end # TODO replace with some clever meta-programming...
114
+ def self.log_component?(component) COMPONENTS.include?(component) ; end # TODO replace with some clever meta-programming...
115
+ def self.log_conversion?(conversion) CONVERSIONS.include?(conversion) ; end # TODO replace with some clever meta-programming...
116
+ def self.log_directive?(directive) (directive == :full) or log_conversion?(directive) or log_component?(directive) or log_subcomponent?(directive) ; end
117
+
118
+ #
119
+ # extraction filters for log line components
120
+ #
121
+
122
+ def self.regexp_for_remote_address(remote_address)
123
+ Regexp.compile(/^(#{remote_address}) /)
124
+ end
125
+
126
+ def self.regexp_for_request(request)
127
+ Regexp.compile(/^([^"]+) "([^"]*#{request}[^"]*)" /)
128
+ end
129
+
130
+ def self.regexp_for_status(status)
131
+ Regexp.compile(/ "([^"]+)" (#{status}) /)
132
+ end
133
+
134
+ def self.regexp_for_http_referer(http_referer)
135
+ Regexp.compile(/" .* "([^"]*#{http_referer}[^"]*)" "/)
136
+ end
137
+
138
+ def self.regexp_for_http_user_agent(http_user_agent)
139
+ Regexp.compile(/ "([^"]*#{http_user_agent}[^"]*)"$/)
140
+ end
141
+
142
+ #
143
+ # validation of log line components
144
+ #
145
+
146
+ def self.valid_status?(status)
147
+ if /\A(\d{1,3})\Z/ =~ status
148
+ return $~.captures.all? { |i| 100 <= i.to_i and i.to_i < 600 }
149
+ end
150
+ return false
151
+ end
152
+
153
+ def self.valid_v4?(addr)
154
+ if /\A(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\Z/ =~ addr
155
+ return $~.captures.all? {|i| i.to_i < 256}
156
+ end
157
+ return false
158
+ end
159
+
160
+ def self.valid_request?(request) true ; end
161
+ def self.valid_referer?(referer) true ; end
162
+ def self.valid_user_agent?(user_agent) true ; end
163
+
164
+ #
165
+ # conversion of log line components
166
+ #
167
+
168
+ # >> DateTime.strptime("13/Apr/2010:04:45:51 +0100", '%d/%b/%Y:%T %z').to_s
169
+ # => "2010-04-13T04:45:51+01:00"
170
+ # >> DateTime.strptime("13/Apr/2010:04:45:51 +0100", '%d/%b/%Y:%H:%M:%S %z').to_s
171
+ # => "2010-04-13T04:45:51+01:00"
172
+ # >> _
173
+
174
+ def to_date() DateTime.strptime(self.time_local, '%d/%b/%Y:%T %z') ; end
175
+
176
+ class SearchBot < Agent
177
+ attr_accessor :name
178
+ attr_accessor :os
179
+ def initialize(string)
180
+ super string
181
+ @name = self.class.name_for_user_agent(string)
182
+ @os = self.class.os_for_user_agent(string)
183
+ end
184
+ def self.name_for_user_agent string
185
+ case string
186
+ when GOOGLE_BOT then "googlebot"
187
+ when MSN_BOT then "msnbot"
188
+ when YAHOO_BOT then "yahoo_slurp"
189
+ when ALEXA_BOT then "ia_archiver"
190
+ when PINGDOM_BOT then "pingdom_bot"
191
+ when YANDEX_BOT then "yandex_bot"
192
+ else super(string)
193
+ end
194
+ end
195
+ def self.os_for_user_agent string
196
+ case string
197
+ when GOOGLE_BOT then "google.com"
198
+ when MSN_BOT then "msn.com"
199
+ when YAHOO_BOT then "yahoo.com"
200
+ when ALEXA_BOT then "alexa.com"
201
+ when PINGDOM_BOT then "pingdom.com"
202
+ when YANDEX_BOT then "yandex.com"
203
+ else super(string)
204
+ end
205
+ end
206
+ end
207
+
208
+ def to_agent()
209
+ if known_search_bot?
210
+ SearchBot.new(self.http_user_agent)
211
+ else
212
+ Agent.new(self.http_user_agent)
213
+ end
214
+ end
215
+
216
+ def to_agent_s()
217
+ agent = self.to_agent ; "(%s, %s)" % [agent.name, agent.os]
218
+ end
219
+
220
+ def to_host_name()
221
+ Socket::getaddrinfo(self.remote_address,nil)[0][2]
222
+ end
223
+
224
+ if defined? GeoIP # ie. if the optional GeoIP gem is installed
225
+
226
+ if File.exists?('/usr/share/GeoIP/GeoIP.dat')
227
+ def to_country()
228
+ record = GeoIP.new('/usr/share/GeoIP/GeoIP.dat').country(self.remote_address) ; record ? record[5] : 'N/A'
229
+ end
230
+ end
231
+
232
+ if File.exists?('/usr/share/GeoIP/GeoIPCity.dat')
233
+ def to_city()
234
+ record = GeoIP.new('/usr/share/GeoIP/GeoIPCity.dat').city(self.remote_address) ; record ? record[7] : 'N/A'
235
+ end
236
+ end
237
+
238
+ end
239
+
240
+ #
241
+ # downstream proxy servers
242
+ #
243
+
244
+ PROXY_IP_ADDRESSES = %w{
245
+ 192.168.0.2
246
+ 192.168.0.3
247
+ 192.168.0.4
248
+ }
249
+
250
+ def self.proxy_ip_address?(remote_address) PROXY_IP_ADDRESSES.include?(remote_address) ; end
251
+ def proxy_ip_address?() self.class.proxy_ip_address?(self.remote_address) ; end
252
+
253
+ #
254
+ # known IP addresses, for filtering purposes
255
+ #
256
+
257
+ OFFICE_IP_ADDRESSES = %w{
258
+ }
259
+
260
+ def self.office_ip_address?(remote_address) OFFICE_IP_ADDRESSES.include?(remote_address) ; end
261
+ def office_ip_address?() self.class.office_ip_address?(self.remote_address) ; end
262
+
263
+ #
264
+ # Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
265
+ # Googlebot-Image/1.0
266
+ # msnbot/2.0b (+http://search.msn.com/msnbot.htm)
267
+ # msnbot/2.0b (+http://search.msn.com/msnbot.htm).
268
+ # msnbot/2.0b (+http://search.msn.com/msnbot.htm)._
269
+ # Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)
270
+ # Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)
271
+ # ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)
272
+ # Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
273
+ #
274
+
275
+ KNOWN_SEARCH_BOTS = [
276
+ GOOGLE_BOT = Regexp.compile('Googlebot.*\/'),
277
+ MSN_BOT = Regexp.compile('msnbot\/'),
278
+ YAHOO_BOT = Regexp.compile('Yahoo! Slurp\/?'),
279
+ PINGDOM_BOT = Regexp.compile('Pingdom.com_bot_version_'),
280
+ ALEXA_BOT = Regexp.compile('ia_archiver'),
281
+ YANDEX_BOT = Regexp.compile('YandexBot\/'),
282
+ nil
283
+ ].compact!
284
+
285
+ def self.known_search_bot?(user_agent) !KNOWN_SEARCH_BOTS.detect { |bot| bot.match(user_agent) }.nil? end
286
+ def known_search_bot?() self.class.known_search_bot?(self.http_user_agent) ; end
287
+
288
+ #
289
+ # mainly to easily identify external referers, for filtering purposes
290
+ #
291
+
292
+ INTERNAL_REFERERS = [
293
+ Regexp.compile('^http://(www\.)?MY_WEBSITE_NAME\.com'),
294
+ Regexp.compile('^-$'),
295
+ ]
296
+
297
+ def self.internal_referer?(http_referer) !INTERNAL_REFERERS.detect { |referer| referer.match(http_referer) }.nil? end
298
+ def internal_referer?() self.class.internal_referer?(self.http_referer) ; end
299
+
300
+ def self.external_referer?(http_referer) !self.internal_referer?(http_referer) ; end
301
+ def external_referer?() self.class.external_referer?(self.http_referer) ; end
302
+
303
+ def self.authenticated_user?(remote_user) remote_user and remote_user != "-" ; end
304
+ def authenticated_user?() self.class.authenticated_user?(self.remote_user) ; end
305
+
306
+ #
307
+ # "GET /xd_receiver.html HTTP/1.1"
308
+ # "GET /crossdomain.xml HTTP/1.1"
309
+ # "GET /favicon.ico HTTP/1.1"
310
+ # "GET /robots.txt HTTP/1.0"
311
+ #
312
+
313
+ AUTOMATED_REQUESTS = [
314
+ Regexp.compile('^[A-Z]+ \/xd_receiver.html'),
315
+ Regexp.compile('^[A-Z]+ \/crossdomain.xml'),
316
+ Regexp.compile('^[A-Z]+ \/favicon.ico'),
317
+ Regexp.compile('^[A-Z]+ \/robots.txt'),
318
+ nil
319
+ ].compact!
320
+
321
+ def self.automated_request?(request) !AUTOMATED_REQUESTS.detect { |automated_request_regexp| request.match(automated_request_regexp) }.nil? end
322
+ def automated_request?() self.class.automated_request?(self.request) ; end
323
+
324
+ #
325
+ # subdirectories of the "public" folder in the web root,
326
+ # which - in a typical Rails setup - are served by nginx
327
+ #
328
+
329
+ STATIC_REPOS = %w{
330
+ flash
331
+ html
332
+ images
333
+ javascripts
334
+ movies
335
+ newsletters
336
+ pictures
337
+ stylesheets
338
+ xml
339
+ }
340
+
341
+ STATIC_URIS = STATIC_REPOS.map { |repo| Regexp.compile("^\/#{repo}\/") }
342
+
343
+ def self.static_uri?(uri) !STATIC_URIS.detect { |static_uri_regexp| uri.match(static_uri_regexp) }.nil? end
344
+ def static_uri?() self.class.static_uri?(self.uri); end
345
+
346
+ STATIC_REQUESTS = STATIC_REPOS.map { |repo| Regexp.compile("^[A-Z]+ \/#{repo}\/") }
347
+
348
+ def self.static_request?(request) !STATIC_REQUESTS.detect { |static_request_regexp| request.match(static_request_regexp) }.nil? end
349
+ def static_request?() self.class.static_request?(self.request) ; end
350
+
351
+ NGINX_MAGIC_STATUS = '499' # ex-standard HTTP response code specific to nginx, in addition to http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
352
+ UNPROCESSABLE_ENTITY = '422' # not supported by 'net/http' (Net::HTTPResponse::CODE_TO_OBJ["422"] == nil), see also: http://www.ruby-forum.com/topic/98002
353
+
354
+ # Informational 1xx
355
+ def self.information_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPInformation ; end
356
+ def information_status?() self.class.information_status?(self.status) ; end
357
+
358
+ # Successful 2xx
359
+ def self.success_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPSuccess ; end
360
+ def success_status?() self.class.success_status?(self.status) ; end
361
+
362
+ # Redirection 3xx
363
+ def self.redirect_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPRedirection ; end
364
+ def redirect_status?() self.class.redirect_status?(self.status) ; end
365
+
366
+ # Client Error 4xx
367
+ def self.client_error_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPClientError ; end
368
+ def client_error_status?() self.class.client_error_status?(self.status) ; end
369
+
370
+ # Internal Server Error 5xx
371
+ def self.server_error_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPServerError ; end
372
+ def server_error_status?() self.class.server_error_status?(self.status) ; end
373
+
374
+ end # class LogLine
375
+ end # module NginxTail
@@ -0,0 +1,75 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{ntail}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Peter Vandenberk"]
12
+ s.date = %q{2010-12-20}
13
+ s.default_executable = %q{ntail}
14
+ s.description = %q{A parsing tail(1)-like utility for nginx log files}
15
+ s.email = %q{pvandenberk@mac.com}
16
+ s.executables = ["ntail"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE.txt",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ "Gemfile",
24
+ "Gemfile.lock",
25
+ "LICENSE.txt",
26
+ "README.rdoc",
27
+ "Rakefile",
28
+ "VERSION",
29
+ "bin/ntail",
30
+ "lib/ntail.rb",
31
+ "lib/ntail/application.rb",
32
+ "lib/ntail/log_line.rb",
33
+ "ntail.gemspec",
34
+ "test/helper.rb",
35
+ "test/test_ntail.rb"
36
+ ]
37
+ s.homepage = %q{http://github.com/pvdb/ntail}
38
+ s.licenses = ["MIT"]
39
+ s.require_paths = ["lib"]
40
+ s.rubygems_version = %q{1.3.7}
41
+ s.summary = %q{A parsing tail(1)-like utility for nginx log files}
42
+ s.test_files = [
43
+ "test/helper.rb",
44
+ "test/test_ntail.rb"
45
+ ]
46
+
47
+ if s.respond_to? :specification_version then
48
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
49
+ s.specification_version = 3
50
+
51
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
52
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
53
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
54
+ s.add_development_dependency(%q<jeweler>, ["~> 1.5.1"])
55
+ s.add_development_dependency(%q<rcov>, [">= 0"])
56
+ s.add_development_dependency(%q<rainbow>, [">= 0"])
57
+ s.add_development_dependency(%q<user-agent>, [">= 0"])
58
+ else
59
+ s.add_dependency(%q<shoulda>, [">= 0"])
60
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
61
+ s.add_dependency(%q<jeweler>, ["~> 1.5.1"])
62
+ s.add_dependency(%q<rcov>, [">= 0"])
63
+ s.add_dependency(%q<rainbow>, [">= 0"])
64
+ s.add_dependency(%q<user-agent>, [">= 0"])
65
+ end
66
+ else
67
+ s.add_dependency(%q<shoulda>, [">= 0"])
68
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
69
+ s.add_dependency(%q<jeweler>, ["~> 1.5.1"])
70
+ s.add_dependency(%q<rcov>, [">= 0"])
71
+ s.add_dependency(%q<rainbow>, [">= 0"])
72
+ s.add_dependency(%q<user-agent>, [">= 0"])
73
+ end
74
+ end
75
+
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'ntail'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestNtail < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,169 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ntail
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Peter Vandenberk
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-20 00:00:00 +00:00
19
+ default_executable: ntail
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ type: :development
23
+ prerelease: false
24
+ name: shoulda
25
+ version_requirements: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ requirement: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ type: :development
37
+ prerelease: false
38
+ name: bundler
39
+ version_requirements: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ~>
43
+ - !ruby/object:Gem::Version
44
+ hash: 23
45
+ segments:
46
+ - 1
47
+ - 0
48
+ - 0
49
+ version: 1.0.0
50
+ requirement: *id002
51
+ - !ruby/object:Gem::Dependency
52
+ type: :development
53
+ prerelease: false
54
+ name: jeweler
55
+ version_requirements: &id003 !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ~>
59
+ - !ruby/object:Gem::Version
60
+ hash: 1
61
+ segments:
62
+ - 1
63
+ - 5
64
+ - 1
65
+ version: 1.5.1
66
+ requirement: *id003
67
+ - !ruby/object:Gem::Dependency
68
+ type: :development
69
+ prerelease: false
70
+ name: rcov
71
+ version_requirements: &id004 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 3
77
+ segments:
78
+ - 0
79
+ version: "0"
80
+ requirement: *id004
81
+ - !ruby/object:Gem::Dependency
82
+ type: :development
83
+ prerelease: false
84
+ name: rainbow
85
+ version_requirements: &id005 !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ hash: 3
91
+ segments:
92
+ - 0
93
+ version: "0"
94
+ requirement: *id005
95
+ - !ruby/object:Gem::Dependency
96
+ type: :development
97
+ prerelease: false
98
+ name: user-agent
99
+ version_requirements: &id006 !ruby/object:Gem::Requirement
100
+ none: false
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ hash: 3
105
+ segments:
106
+ - 0
107
+ version: "0"
108
+ requirement: *id006
109
+ description: A parsing tail(1)-like utility for nginx log files
110
+ email: pvandenberk@mac.com
111
+ executables:
112
+ - ntail
113
+ extensions: []
114
+
115
+ extra_rdoc_files:
116
+ - LICENSE.txt
117
+ - README.rdoc
118
+ files:
119
+ - .document
120
+ - Gemfile
121
+ - Gemfile.lock
122
+ - LICENSE.txt
123
+ - README.rdoc
124
+ - Rakefile
125
+ - VERSION
126
+ - bin/ntail
127
+ - lib/ntail.rb
128
+ - lib/ntail/application.rb
129
+ - lib/ntail/log_line.rb
130
+ - ntail.gemspec
131
+ - test/helper.rb
132
+ - test/test_ntail.rb
133
+ has_rdoc: true
134
+ homepage: http://github.com/pvdb/ntail
135
+ licenses:
136
+ - MIT
137
+ post_install_message:
138
+ rdoc_options: []
139
+
140
+ require_paths:
141
+ - lib
142
+ required_ruby_version: !ruby/object:Gem::Requirement
143
+ none: false
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ hash: 3
148
+ segments:
149
+ - 0
150
+ version: "0"
151
+ required_rubygems_version: !ruby/object:Gem::Requirement
152
+ none: false
153
+ requirements:
154
+ - - ">="
155
+ - !ruby/object:Gem::Version
156
+ hash: 3
157
+ segments:
158
+ - 0
159
+ version: "0"
160
+ requirements: []
161
+
162
+ rubyforge_project:
163
+ rubygems_version: 1.3.7
164
+ signing_key:
165
+ specification_version: 3
166
+ summary: A parsing tail(1)-like utility for nginx log files
167
+ test_files:
168
+ - test/helper.rb
169
+ - test/test_ntail.rb