ntail 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/README.rdoc +1 -1
- data/VERSION +1 -1
- data/lib/ntail.rb +24 -1
- data/lib/ntail/application.rb +60 -28
- data/lib/ntail/body_bytes_sent.rb +14 -0
- data/lib/ntail/http_method.rb +34 -0
- data/lib/ntail/http_referer.rb +81 -0
- data/lib/ntail/http_user_agent.rb +122 -0
- data/lib/ntail/http_version.rb +22 -0
- data/lib/ntail/known_ip_addresses.rb +44 -0
- data/lib/ntail/local_ip_addresses.rb +44 -0
- data/lib/ntail/log_line.rb +75 -222
- data/lib/ntail/proxy_addresses.rb +14 -0
- data/lib/ntail/remote_addr.rb +56 -0
- data/lib/ntail/remote_user.rb +63 -0
- data/lib/ntail/request.rb +33 -0
- data/lib/ntail/status.rb +68 -0
- data/lib/ntail/time_local.rb +38 -0
- data/lib/ntail/uri.rb +22 -0
- data/ntail.gemspec +41 -2
- data/test/helper.rb +73 -0
- data/test/ntail/test_http_method.rb +50 -0
- data/test/ntail/test_http_referer.rb +77 -0
- data/test/ntail/test_http_user_agent.rb +29 -0
- data/test/ntail/test_known_ip_addresses.rb +45 -0
- data/test/ntail/test_local_ip_addresses.rb +45 -0
- data/test/ntail/test_log_line.rb +51 -0
- data/test/ntail/test_remote_addr.rb +38 -0
- data/test/ntail/test_remote_user.rb +65 -0
- data/test/ntail/test_request.rb +26 -0
- data/test/ntail/test_status.rb +26 -0
- data/test/ntail/test_time_local.rb +30 -0
- data/test/test_ntail.rb +9 -2
- metadata +59 -9
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.rdoc
CHANGED
@@ -49,7 +49,7 @@ A tail(1)-like utility for nginx log files that supports parsing, filtering and
|
|
49
49
|
|
50
50
|
* print out GeoIP country and city information for each HTTP request <em>(depends on the optional <tt>geoip</tt> gem)</em>
|
51
51
|
|
52
|
-
> ntail -e '{ |line| puts [line.
|
52
|
+
> ntail -e '{ |line| puts [line.to_country_s, line.to_city_s].join("\t") }' /var/log/nginx/access.log
|
53
53
|
United States Los Angeles
|
54
54
|
United States Houston
|
55
55
|
Germany Berlin
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
data/lib/ntail.rb
CHANGED
@@ -1,5 +1,28 @@
|
|
1
1
|
NTAIL_NAME = 'ntail'
|
2
2
|
NTAIL_VERSION = '0.0.1'
|
3
3
|
|
4
|
+
# so-called components...
|
5
|
+
require 'ntail/remote_addr'
|
6
|
+
require 'ntail/remote_user'
|
7
|
+
require 'ntail/time_local'
|
8
|
+
require 'ntail/request'
|
9
|
+
require 'ntail/status'
|
10
|
+
require 'ntail/body_bytes_sent'
|
11
|
+
require 'ntail/http_referer'
|
12
|
+
require 'ntail/http_user_agent'
|
13
|
+
require 'ntail/proxy_addresses'
|
14
|
+
|
15
|
+
# so-called sub-components...
|
16
|
+
require 'ntail/http_method'
|
17
|
+
require 'ntail/uri'
|
18
|
+
require 'ntail/http_version'
|
19
|
+
|
20
|
+
# additional utility functions...
|
21
|
+
require 'ntail/known_ip_addresses'
|
22
|
+
require 'ntail/local_ip_addresses'
|
23
|
+
|
24
|
+
# the core classes...
|
4
25
|
require 'ntail/log_line'
|
5
|
-
require 'ntail/application'
|
26
|
+
require 'ntail/application'
|
27
|
+
|
28
|
+
# That's all, Folks!
|
data/lib/ntail/application.rb
CHANGED
@@ -12,17 +12,32 @@ module NginxTail
|
|
12
12
|
def self.ntail_options
|
13
13
|
# shamelessly copied from lib/rake.rb (rake gem)
|
14
14
|
[
|
15
|
-
['--verbose', '
|
15
|
+
['--verbose', '-v', "Run verbosely (log messages to STDERR).",
|
16
16
|
lambda { |value|
|
17
17
|
self.options.verbose = true
|
18
18
|
}
|
19
19
|
],
|
20
|
+
['--dry-run', '-n', "Dry-run: process files, but don't actually parse the lines",
|
21
|
+
lambda { |value|
|
22
|
+
self.options.dry_run = true
|
23
|
+
}
|
24
|
+
],
|
25
|
+
['--parse-only', '-p', "Parse only: parse all lines, but don't actually process them",
|
26
|
+
lambda { |value|
|
27
|
+
self.options.parse_only = true
|
28
|
+
}
|
29
|
+
],
|
20
30
|
['--version', '-V', "Display the program version.",
|
21
31
|
lambda { |value|
|
22
32
|
puts "#{NTAIL_NAME}, version #{NTAIL_VERSION}"
|
23
33
|
self.options.running = false
|
24
34
|
}
|
25
35
|
],
|
36
|
+
['--line-number', '-l LINE_NUMBER', "Only process the line with the given line number",
|
37
|
+
lambda { |value|
|
38
|
+
self.options.line_number = value.to_i
|
39
|
+
}
|
40
|
+
],
|
26
41
|
['--filter', '-f CODE', "Ruby code block for filtering (parsed) lines - needs to return true or false.",
|
27
42
|
lambda { |value|
|
28
43
|
self.options.filter = eval "Proc.new #{value}"
|
@@ -39,6 +54,7 @@ module NginxTail
|
|
39
54
|
def self.parse_options
|
40
55
|
|
41
56
|
# application defaults...
|
57
|
+
self.options.interrupted = false
|
42
58
|
self.options.running = true
|
43
59
|
self.options.exit = 0
|
44
60
|
|
@@ -62,47 +78,63 @@ module NginxTail
|
|
62
78
|
|
63
79
|
['TERM', 'INT'].each do |signal|
|
64
80
|
Signal.trap(signal) do
|
65
|
-
self.options.running = false ;
|
81
|
+
self.options.running = false ; self.options.interrupted = true
|
82
|
+
$stdin.close if ARGF.file == $stdin # ie. reading from STDIN
|
66
83
|
end
|
67
84
|
end
|
68
85
|
|
69
|
-
lines_read = lines_processed = lines_ignored = parsable_lines = unparsable_lines = 0
|
86
|
+
files_read = lines_read = lines_processed = lines_ignored = parsable_lines = unparsable_lines = 0
|
70
87
|
|
71
88
|
while self.options.running and ARGF.gets
|
89
|
+
if ARGF.file.lineno == 1
|
90
|
+
files_read += 1
|
91
|
+
if self.options.verbose
|
92
|
+
$stderr.puts "[INFO] now processing file #{ARGF.filename}"
|
93
|
+
end
|
94
|
+
end
|
72
95
|
raw_line = $_.chomp ; lines_read += 1
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
96
|
+
unless self.options.dry_run
|
97
|
+
if !self.options.line_number or self.options.line_number == ARGF.lineno
|
98
|
+
begin
|
99
|
+
log_line = NginxTail::LogLine.new(raw_line)
|
100
|
+
if log_line.parsable
|
101
|
+
parsable_lines += 1
|
102
|
+
unless self.options.parse_only
|
103
|
+
if !self.options.filter || self.options.filter.call(log_line)
|
104
|
+
lines_processed += 1
|
105
|
+
if self.options.code
|
106
|
+
self.options.code.call(log_line)
|
107
|
+
else
|
108
|
+
puts log_line
|
109
|
+
end
|
110
|
+
else
|
111
|
+
lines_ignored += 1
|
112
|
+
if self.options.verbose
|
113
|
+
$stderr.puts "[WARNING] ignoring line ##{lines_read}"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
81
117
|
else
|
82
|
-
|
118
|
+
unparsable_lines += 1
|
119
|
+
if self.options.verbose
|
120
|
+
$stderr.puts "[ERROR] cannot parse '#{raw_line}'"
|
121
|
+
end
|
83
122
|
end
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
else
|
91
|
-
unparsable_lines += 1
|
92
|
-
if self.options.verbose
|
93
|
-
$stderr.puts "[ERROR] cannot parse '#{raw_line}'"
|
123
|
+
rescue
|
124
|
+
$stderr.puts "[ERROR] processing line #{ARGF.file.lineno} of file #{ARGF.filename} resulted in #{$!.message}"
|
125
|
+
$stderr.puts "[ERROR] " + raw_line
|
126
|
+
self.options.exit = -1
|
127
|
+
self.options.running = false
|
128
|
+
raise $! # TODO if the "re-raise exceptions" option has been set...
|
94
129
|
end
|
95
130
|
end
|
96
|
-
rescue
|
97
|
-
$stderr.puts "[ERROR] processing line #{lines_read} resulted in #{$!.message}"
|
98
|
-
$stderr.puts "[ERROR] " + raw_line
|
99
|
-
self.options.exit = -1
|
100
|
-
self.options.running = false
|
101
131
|
end
|
102
132
|
end
|
103
133
|
|
104
134
|
if self.options.verbose
|
105
|
-
$stderr.puts
|
135
|
+
$stderr.puts if self.options.interrupted
|
136
|
+
$stderr.print "[INFO] read #{lines_read} lines in #{files_read} files"
|
137
|
+
$stderr.print " (interrupted)" if self.options.interrupted ; $stderr.puts
|
106
138
|
$stderr.puts "[INFO] #{parsable_lines} parsable lines, #{unparsable_lines} unparsable lines"
|
107
139
|
$stderr.puts "[INFO] processed #{lines_processed} lines, ignored #{lines_ignored} lines"
|
108
140
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module NginxTail
|
2
|
+
module BodyBytesSent
|
3
|
+
|
4
|
+
def self.included(base) # :nodoc:
|
5
|
+
base.class_eval do
|
6
|
+
|
7
|
+
# this ensures the below module methods actually make sense...
|
8
|
+
raise "Class #{base.name} should implement instance method 'body_bytes_sent'" unless base.instance_methods.include? 'body_bytes_sent'
|
9
|
+
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module NginxTail
|
2
|
+
module HttpMethod
|
3
|
+
|
4
|
+
#
|
5
|
+
# http://www.ietf.org/rfc/rfc2616.txt - "section 5.1.1 Method"
|
6
|
+
#
|
7
|
+
# OPTIONS GET HEAD POST PUT DELETE TRACE CONNECT
|
8
|
+
#
|
9
|
+
|
10
|
+
def self.included(base) # :nodoc:
|
11
|
+
base.class_eval do
|
12
|
+
|
13
|
+
def self.to_http_method_s(http_method)
|
14
|
+
(http_method ||= "").upcase! # will be nil if $request == "-" (ie. "dodgy" HTTP requests)
|
15
|
+
case http_method
|
16
|
+
when "POST", "PUT", "DELETE"
|
17
|
+
http_method.inverse # if Sickill::Rainbow.enabled...
|
18
|
+
else
|
19
|
+
http_method
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# this ensures the below module methods actually make sense...
|
24
|
+
raise "Class #{base.name} should implement instance method 'http_method'" unless base.instance_methods.include? 'http_method'
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_http_method_s
|
30
|
+
self.class.to_http_method_s(self.http_method)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module NginxTail
|
4
|
+
module HttpReferer
|
5
|
+
|
6
|
+
#
|
7
|
+
# to easily identify external referers, for filtering and formatting purposes
|
8
|
+
#
|
9
|
+
# e.g. Regexp.compile('^http(s)?://(www\.)?MY_WEBSITE_NAME\.com')
|
10
|
+
#
|
11
|
+
|
12
|
+
UNKNOWN_REFERER = "-".freeze # the 'default' nginx value for $http_referer variable
|
13
|
+
|
14
|
+
def self.included(base) # :nodoc:
|
15
|
+
base.class_eval do
|
16
|
+
|
17
|
+
@@internal_referers = []
|
18
|
+
|
19
|
+
# mainly (solely?) for testing purposes...
|
20
|
+
def self.reset_internal_referers()
|
21
|
+
while !@@internal_referers.empty? ; @@internal_referers.pop ; end
|
22
|
+
end
|
23
|
+
|
24
|
+
# mainly (solely?) for testing purposes...
|
25
|
+
def self.internal_referers()
|
26
|
+
@@internal_referers.dup
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.add_internal_referer(internal_referer)
|
30
|
+
raise "Cannot add unkown HTTP referer" if self.unknown_referer? internal_referer
|
31
|
+
(@@internal_referers << internal_referer).uniq!
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.unknown_referer?(http_referer)
|
35
|
+
http_referer == UNKNOWN_REFERER
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.internal_referer?(http_referer)
|
39
|
+
!self.unknown_referer?(http_referer) && !@@internal_referers.detect { |referer| referer.match(http_referer) }.nil?
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.external_referer?(http_referer)
|
43
|
+
!self.unknown_referer?(http_referer) && !self.internal_referer?(http_referer)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.to_referer_s(http_referer)
|
47
|
+
if self.unknown_referer? http_referer
|
48
|
+
http_referer
|
49
|
+
else begin
|
50
|
+
# try to parse it as a URI, but with default value if un-parsable
|
51
|
+
URI.parse(http_referer).host || http_referer
|
52
|
+
rescue URI::InvalidURIError
|
53
|
+
http_referer
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# this ensures the below module methods actually make sense...
|
59
|
+
raise "Class #{base.name} should implement instance method 'http_referer'" unless base.instance_methods.include? 'http_referer'
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def unknown_referer?
|
65
|
+
self.class.unknown_referer?(self.http_referer)
|
66
|
+
end
|
67
|
+
|
68
|
+
def internal_referer?
|
69
|
+
self.class.internal_referer?(self.http_referer)
|
70
|
+
end
|
71
|
+
|
72
|
+
def external_referer?
|
73
|
+
self.class.external_referer?(self.http_referer)
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_referer_s
|
77
|
+
self.class.to_referer_s(self.http_referer)
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'user-agent'
|
3
|
+
|
4
|
+
class Agent
|
5
|
+
|
6
|
+
def search_bot?
|
7
|
+
false
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
class SearchBot < Agent
|
13
|
+
|
14
|
+
def search_bot?
|
15
|
+
true
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
|
20
|
+
# Googlebot-Image/1.0
|
21
|
+
# msnbot/2.0b (+http://search.msn.com/msnbot.htm)
|
22
|
+
# msnbot/2.0b (+http://search.msn.com/msnbot.htm).
|
23
|
+
# msnbot/2.0b (+http://search.msn.com/msnbot.htm)._
|
24
|
+
# Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)
|
25
|
+
# Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)
|
26
|
+
# ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)
|
27
|
+
# Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
|
28
|
+
#
|
29
|
+
|
30
|
+
KNOWN_SEARCH_BOTS = [
|
31
|
+
GOOGLE_BOT = Regexp.compile('Googlebot.*\/'),
|
32
|
+
MSN_BOT = Regexp.compile('msnbot\/'),
|
33
|
+
YAHOO_BOT = Regexp.compile('Yahoo! Slurp\/?'),
|
34
|
+
PINGDOM_BOT = Regexp.compile('Pingdom.com_bot_version_'),
|
35
|
+
ALEXA_BOT = Regexp.compile('ia_archiver'),
|
36
|
+
YANDEX_BOT = Regexp.compile('YandexBot\/'),
|
37
|
+
]
|
38
|
+
|
39
|
+
def self.search_bot?(http_user_agent)
|
40
|
+
!KNOWN_SEARCH_BOTS.detect { |bot| bot.match(http_user_agent) }.nil?
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_accessor :name
|
44
|
+
attr_accessor :os
|
45
|
+
|
46
|
+
def initialize(string)
|
47
|
+
super string
|
48
|
+
@name = self.class.name_for_user_agent(string)
|
49
|
+
@os = self.class.os_for_user_agent(string)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.name_for_user_agent string
|
53
|
+
case string
|
54
|
+
when GOOGLE_BOT then :googlebot
|
55
|
+
when MSN_BOT then :msnbot
|
56
|
+
when YAHOO_BOT then :yahoo_slurp
|
57
|
+
when ALEXA_BOT then :ia_archiver
|
58
|
+
when PINGDOM_BOT then :pingdom_bot
|
59
|
+
when YANDEX_BOT then :yandex_bot
|
60
|
+
else super(string)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.os_for_user_agent string
|
65
|
+
case string
|
66
|
+
when GOOGLE_BOT then :"google.com"
|
67
|
+
when MSN_BOT then :"msn.com"
|
68
|
+
when YAHOO_BOT then :"yahoo.com"
|
69
|
+
when ALEXA_BOT then :"alexa.com"
|
70
|
+
when PINGDOM_BOT then :"pingdom.com"
|
71
|
+
when YANDEX_BOT then :"yandex.com"
|
72
|
+
else super(string)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
module NginxTail
|
79
|
+
module HttpUserAgent
|
80
|
+
|
81
|
+
def self.included(base) # :nodoc:
|
82
|
+
base.class_eval do
|
83
|
+
|
84
|
+
def self.search_bot?(http_user_agent)
|
85
|
+
SearchBot.search_bot?(http_user_agent)
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.to_agent(http_user_agent)
|
89
|
+
if self.search_bot? http_user_agent
|
90
|
+
SearchBot.new(http_user_agent)
|
91
|
+
else
|
92
|
+
Agent.new(http_user_agent)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.to_agent_s(http_user_agent)
|
97
|
+
agent = self.to_agent http_user_agent
|
98
|
+
"(%s, %s)" % [agent.name, agent.os]
|
99
|
+
end
|
100
|
+
|
101
|
+
# this ensures the below module methods actually make sense...
|
102
|
+
raise "Class #{base.name} should implement instance method 'http_user_agent'" unless base.instance_methods.include? 'http_user_agent'
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def search_bot?
|
108
|
+
self.class.search_bot?(self.http_user_agent)
|
109
|
+
end
|
110
|
+
|
111
|
+
def to_agent
|
112
|
+
self.class.to_agent(self.http_user_agent)
|
113
|
+
end
|
114
|
+
|
115
|
+
def to_agent_s
|
116
|
+
self.class.to_agent_s(self.http_user_agent)
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
|