ntail 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/README.rdoc +1 -1
- data/VERSION +1 -1
- data/lib/ntail.rb +24 -1
- data/lib/ntail/application.rb +60 -28
- data/lib/ntail/body_bytes_sent.rb +14 -0
- data/lib/ntail/http_method.rb +34 -0
- data/lib/ntail/http_referer.rb +81 -0
- data/lib/ntail/http_user_agent.rb +122 -0
- data/lib/ntail/http_version.rb +22 -0
- data/lib/ntail/known_ip_addresses.rb +44 -0
- data/lib/ntail/local_ip_addresses.rb +44 -0
- data/lib/ntail/log_line.rb +75 -222
- data/lib/ntail/proxy_addresses.rb +14 -0
- data/lib/ntail/remote_addr.rb +56 -0
- data/lib/ntail/remote_user.rb +63 -0
- data/lib/ntail/request.rb +33 -0
- data/lib/ntail/status.rb +68 -0
- data/lib/ntail/time_local.rb +38 -0
- data/lib/ntail/uri.rb +22 -0
- data/ntail.gemspec +41 -2
- data/test/helper.rb +73 -0
- data/test/ntail/test_http_method.rb +50 -0
- data/test/ntail/test_http_referer.rb +77 -0
- data/test/ntail/test_http_user_agent.rb +29 -0
- data/test/ntail/test_known_ip_addresses.rb +45 -0
- data/test/ntail/test_local_ip_addresses.rb +45 -0
- data/test/ntail/test_log_line.rb +51 -0
- data/test/ntail/test_remote_addr.rb +38 -0
- data/test/ntail/test_remote_user.rb +65 -0
- data/test/ntail/test_request.rb +26 -0
- data/test/ntail/test_status.rb +26 -0
- data/test/ntail/test_time_local.rb +30 -0
- data/test/test_ntail.rb +9 -2
- metadata +59 -9
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.rdoc
CHANGED
@@ -49,7 +49,7 @@ A tail(1)-like utility for nginx log files that supports parsing, filtering and
|
|
49
49
|
|
50
50
|
* print out GeoIP country and city information for each HTTP request <em>(depends on the optional <tt>geoip</tt> gem)</em>
|
51
51
|
|
52
|
-
> ntail -e '{ |line| puts [line.
|
52
|
+
> ntail -e '{ |line| puts [line.to_country_s, line.to_city_s].join("\t") }' /var/log/nginx/access.log
|
53
53
|
United States Los Angeles
|
54
54
|
United States Houston
|
55
55
|
Germany Berlin
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
data/lib/ntail.rb
CHANGED
@@ -1,5 +1,28 @@
|
|
1
1
|
NTAIL_NAME = 'ntail'
|
2
2
|
NTAIL_VERSION = '0.0.1'
|
3
3
|
|
4
|
+
# so-called components...
|
5
|
+
require 'ntail/remote_addr'
|
6
|
+
require 'ntail/remote_user'
|
7
|
+
require 'ntail/time_local'
|
8
|
+
require 'ntail/request'
|
9
|
+
require 'ntail/status'
|
10
|
+
require 'ntail/body_bytes_sent'
|
11
|
+
require 'ntail/http_referer'
|
12
|
+
require 'ntail/http_user_agent'
|
13
|
+
require 'ntail/proxy_addresses'
|
14
|
+
|
15
|
+
# so-called sub-components...
|
16
|
+
require 'ntail/http_method'
|
17
|
+
require 'ntail/uri'
|
18
|
+
require 'ntail/http_version'
|
19
|
+
|
20
|
+
# additional utility functions...
|
21
|
+
require 'ntail/known_ip_addresses'
|
22
|
+
require 'ntail/local_ip_addresses'
|
23
|
+
|
24
|
+
# the core classes...
|
4
25
|
require 'ntail/log_line'
|
5
|
-
require 'ntail/application'
|
26
|
+
require 'ntail/application'
|
27
|
+
|
28
|
+
# That's all, Folks!
|
data/lib/ntail/application.rb
CHANGED
@@ -12,17 +12,32 @@ module NginxTail
|
|
12
12
|
def self.ntail_options
|
13
13
|
# shamelessly copied from lib/rake.rb (rake gem)
|
14
14
|
[
|
15
|
-
['--verbose', '
|
15
|
+
['--verbose', '-v', "Run verbosely (log messages to STDERR).",
|
16
16
|
lambda { |value|
|
17
17
|
self.options.verbose = true
|
18
18
|
}
|
19
19
|
],
|
20
|
+
['--dry-run', '-n', "Dry-run: process files, but don't actually parse the lines",
|
21
|
+
lambda { |value|
|
22
|
+
self.options.dry_run = true
|
23
|
+
}
|
24
|
+
],
|
25
|
+
['--parse-only', '-p', "Parse only: parse all lines, but don't actually process them",
|
26
|
+
lambda { |value|
|
27
|
+
self.options.parse_only = true
|
28
|
+
}
|
29
|
+
],
|
20
30
|
['--version', '-V', "Display the program version.",
|
21
31
|
lambda { |value|
|
22
32
|
puts "#{NTAIL_NAME}, version #{NTAIL_VERSION}"
|
23
33
|
self.options.running = false
|
24
34
|
}
|
25
35
|
],
|
36
|
+
['--line-number', '-l LINE_NUMBER', "Only process the line with the given line number",
|
37
|
+
lambda { |value|
|
38
|
+
self.options.line_number = value.to_i
|
39
|
+
}
|
40
|
+
],
|
26
41
|
['--filter', '-f CODE', "Ruby code block for filtering (parsed) lines - needs to return true or false.",
|
27
42
|
lambda { |value|
|
28
43
|
self.options.filter = eval "Proc.new #{value}"
|
@@ -39,6 +54,7 @@ module NginxTail
|
|
39
54
|
def self.parse_options
|
40
55
|
|
41
56
|
# application defaults...
|
57
|
+
self.options.interrupted = false
|
42
58
|
self.options.running = true
|
43
59
|
self.options.exit = 0
|
44
60
|
|
@@ -62,47 +78,63 @@ module NginxTail
|
|
62
78
|
|
63
79
|
['TERM', 'INT'].each do |signal|
|
64
80
|
Signal.trap(signal) do
|
65
|
-
self.options.running = false ;
|
81
|
+
self.options.running = false ; self.options.interrupted = true
|
82
|
+
$stdin.close if ARGF.file == $stdin # ie. reading from STDIN
|
66
83
|
end
|
67
84
|
end
|
68
85
|
|
69
|
-
lines_read = lines_processed = lines_ignored = parsable_lines = unparsable_lines = 0
|
86
|
+
files_read = lines_read = lines_processed = lines_ignored = parsable_lines = unparsable_lines = 0
|
70
87
|
|
71
88
|
while self.options.running and ARGF.gets
|
89
|
+
if ARGF.file.lineno == 1
|
90
|
+
files_read += 1
|
91
|
+
if self.options.verbose
|
92
|
+
$stderr.puts "[INFO] now processing file #{ARGF.filename}"
|
93
|
+
end
|
94
|
+
end
|
72
95
|
raw_line = $_.chomp ; lines_read += 1
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
96
|
+
unless self.options.dry_run
|
97
|
+
if !self.options.line_number or self.options.line_number == ARGF.lineno
|
98
|
+
begin
|
99
|
+
log_line = NginxTail::LogLine.new(raw_line)
|
100
|
+
if log_line.parsable
|
101
|
+
parsable_lines += 1
|
102
|
+
unless self.options.parse_only
|
103
|
+
if !self.options.filter || self.options.filter.call(log_line)
|
104
|
+
lines_processed += 1
|
105
|
+
if self.options.code
|
106
|
+
self.options.code.call(log_line)
|
107
|
+
else
|
108
|
+
puts log_line
|
109
|
+
end
|
110
|
+
else
|
111
|
+
lines_ignored += 1
|
112
|
+
if self.options.verbose
|
113
|
+
$stderr.puts "[WARNING] ignoring line ##{lines_read}"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
81
117
|
else
|
82
|
-
|
118
|
+
unparsable_lines += 1
|
119
|
+
if self.options.verbose
|
120
|
+
$stderr.puts "[ERROR] cannot parse '#{raw_line}'"
|
121
|
+
end
|
83
122
|
end
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
else
|
91
|
-
unparsable_lines += 1
|
92
|
-
if self.options.verbose
|
93
|
-
$stderr.puts "[ERROR] cannot parse '#{raw_line}'"
|
123
|
+
rescue
|
124
|
+
$stderr.puts "[ERROR] processing line #{ARGF.file.lineno} of file #{ARGF.filename} resulted in #{$!.message}"
|
125
|
+
$stderr.puts "[ERROR] " + raw_line
|
126
|
+
self.options.exit = -1
|
127
|
+
self.options.running = false
|
128
|
+
raise $! # TODO if the "re-raise exceptions" option has been set...
|
94
129
|
end
|
95
130
|
end
|
96
|
-
rescue
|
97
|
-
$stderr.puts "[ERROR] processing line #{lines_read} resulted in #{$!.message}"
|
98
|
-
$stderr.puts "[ERROR] " + raw_line
|
99
|
-
self.options.exit = -1
|
100
|
-
self.options.running = false
|
101
131
|
end
|
102
132
|
end
|
103
133
|
|
104
134
|
if self.options.verbose
|
105
|
-
$stderr.puts
|
135
|
+
$stderr.puts if self.options.interrupted
|
136
|
+
$stderr.print "[INFO] read #{lines_read} lines in #{files_read} files"
|
137
|
+
$stderr.print " (interrupted)" if self.options.interrupted ; $stderr.puts
|
106
138
|
$stderr.puts "[INFO] #{parsable_lines} parsable lines, #{unparsable_lines} unparsable lines"
|
107
139
|
$stderr.puts "[INFO] processed #{lines_processed} lines, ignored #{lines_ignored} lines"
|
108
140
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module NginxTail
|
2
|
+
module BodyBytesSent
|
3
|
+
|
4
|
+
def self.included(base) # :nodoc:
|
5
|
+
base.class_eval do
|
6
|
+
|
7
|
+
# this ensures the below module methods actually make sense...
|
8
|
+
raise "Class #{base.name} should implement instance method 'body_bytes_sent'" unless base.instance_methods.include? 'body_bytes_sent'
|
9
|
+
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module NginxTail
|
2
|
+
module HttpMethod
|
3
|
+
|
4
|
+
#
|
5
|
+
# http://www.ietf.org/rfc/rfc2616.txt - "section 5.1.1 Method"
|
6
|
+
#
|
7
|
+
# OPTIONS GET HEAD POST PUT DELETE TRACE CONNECT
|
8
|
+
#
|
9
|
+
|
10
|
+
def self.included(base) # :nodoc:
|
11
|
+
base.class_eval do
|
12
|
+
|
13
|
+
def self.to_http_method_s(http_method)
|
14
|
+
(http_method ||= "").upcase! # will be nil if $request == "-" (ie. "dodgy" HTTP requests)
|
15
|
+
case http_method
|
16
|
+
when "POST", "PUT", "DELETE"
|
17
|
+
http_method.inverse # if Sickill::Rainbow.enabled...
|
18
|
+
else
|
19
|
+
http_method
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# this ensures the below module methods actually make sense...
|
24
|
+
raise "Class #{base.name} should implement instance method 'http_method'" unless base.instance_methods.include? 'http_method'
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_http_method_s
|
30
|
+
self.class.to_http_method_s(self.http_method)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module NginxTail
|
4
|
+
module HttpReferer
|
5
|
+
|
6
|
+
#
|
7
|
+
# to easily identify external referers, for filtering and formatting purposes
|
8
|
+
#
|
9
|
+
# e.g. Regexp.compile('^http(s)?://(www\.)?MY_WEBSITE_NAME\.com')
|
10
|
+
#
|
11
|
+
|
12
|
+
UNKNOWN_REFERER = "-".freeze # the 'default' nginx value for $http_referer variable
|
13
|
+
|
14
|
+
def self.included(base) # :nodoc:
|
15
|
+
base.class_eval do
|
16
|
+
|
17
|
+
@@internal_referers = []
|
18
|
+
|
19
|
+
# mainly (solely?) for testing purposes...
|
20
|
+
def self.reset_internal_referers()
|
21
|
+
while !@@internal_referers.empty? ; @@internal_referers.pop ; end
|
22
|
+
end
|
23
|
+
|
24
|
+
# mainly (solely?) for testing purposes...
|
25
|
+
def self.internal_referers()
|
26
|
+
@@internal_referers.dup
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.add_internal_referer(internal_referer)
|
30
|
+
raise "Cannot add unkown HTTP referer" if self.unknown_referer? internal_referer
|
31
|
+
(@@internal_referers << internal_referer).uniq!
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.unknown_referer?(http_referer)
|
35
|
+
http_referer == UNKNOWN_REFERER
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.internal_referer?(http_referer)
|
39
|
+
!self.unknown_referer?(http_referer) && !@@internal_referers.detect { |referer| referer.match(http_referer) }.nil?
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.external_referer?(http_referer)
|
43
|
+
!self.unknown_referer?(http_referer) && !self.internal_referer?(http_referer)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.to_referer_s(http_referer)
|
47
|
+
if self.unknown_referer? http_referer
|
48
|
+
http_referer
|
49
|
+
else begin
|
50
|
+
# try to parse it as a URI, but with default value if un-parsable
|
51
|
+
URI.parse(http_referer).host || http_referer
|
52
|
+
rescue URI::InvalidURIError
|
53
|
+
http_referer
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# this ensures the below module methods actually make sense...
|
59
|
+
raise "Class #{base.name} should implement instance method 'http_referer'" unless base.instance_methods.include? 'http_referer'
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def unknown_referer?
|
65
|
+
self.class.unknown_referer?(self.http_referer)
|
66
|
+
end
|
67
|
+
|
68
|
+
def internal_referer?
|
69
|
+
self.class.internal_referer?(self.http_referer)
|
70
|
+
end
|
71
|
+
|
72
|
+
def external_referer?
|
73
|
+
self.class.external_referer?(self.http_referer)
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_referer_s
|
77
|
+
self.class.to_referer_s(self.http_referer)
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'user-agent'
|
3
|
+
|
4
|
+
class Agent
|
5
|
+
|
6
|
+
def search_bot?
|
7
|
+
false
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
class SearchBot < Agent
|
13
|
+
|
14
|
+
def search_bot?
|
15
|
+
true
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
|
20
|
+
# Googlebot-Image/1.0
|
21
|
+
# msnbot/2.0b (+http://search.msn.com/msnbot.htm)
|
22
|
+
# msnbot/2.0b (+http://search.msn.com/msnbot.htm).
|
23
|
+
# msnbot/2.0b (+http://search.msn.com/msnbot.htm)._
|
24
|
+
# Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)
|
25
|
+
# Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)
|
26
|
+
# ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)
|
27
|
+
# Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
|
28
|
+
#
|
29
|
+
|
30
|
+
KNOWN_SEARCH_BOTS = [
|
31
|
+
GOOGLE_BOT = Regexp.compile('Googlebot.*\/'),
|
32
|
+
MSN_BOT = Regexp.compile('msnbot\/'),
|
33
|
+
YAHOO_BOT = Regexp.compile('Yahoo! Slurp\/?'),
|
34
|
+
PINGDOM_BOT = Regexp.compile('Pingdom.com_bot_version_'),
|
35
|
+
ALEXA_BOT = Regexp.compile('ia_archiver'),
|
36
|
+
YANDEX_BOT = Regexp.compile('YandexBot\/'),
|
37
|
+
]
|
38
|
+
|
39
|
+
def self.search_bot?(http_user_agent)
|
40
|
+
!KNOWN_SEARCH_BOTS.detect { |bot| bot.match(http_user_agent) }.nil?
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_accessor :name
|
44
|
+
attr_accessor :os
|
45
|
+
|
46
|
+
def initialize(string)
|
47
|
+
super string
|
48
|
+
@name = self.class.name_for_user_agent(string)
|
49
|
+
@os = self.class.os_for_user_agent(string)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.name_for_user_agent string
|
53
|
+
case string
|
54
|
+
when GOOGLE_BOT then :googlebot
|
55
|
+
when MSN_BOT then :msnbot
|
56
|
+
when YAHOO_BOT then :yahoo_slurp
|
57
|
+
when ALEXA_BOT then :ia_archiver
|
58
|
+
when PINGDOM_BOT then :pingdom_bot
|
59
|
+
when YANDEX_BOT then :yandex_bot
|
60
|
+
else super(string)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.os_for_user_agent string
|
65
|
+
case string
|
66
|
+
when GOOGLE_BOT then :"google.com"
|
67
|
+
when MSN_BOT then :"msn.com"
|
68
|
+
when YAHOO_BOT then :"yahoo.com"
|
69
|
+
when ALEXA_BOT then :"alexa.com"
|
70
|
+
when PINGDOM_BOT then :"pingdom.com"
|
71
|
+
when YANDEX_BOT then :"yandex.com"
|
72
|
+
else super(string)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
module NginxTail
|
79
|
+
module HttpUserAgent
|
80
|
+
|
81
|
+
def self.included(base) # :nodoc:
|
82
|
+
base.class_eval do
|
83
|
+
|
84
|
+
def self.search_bot?(http_user_agent)
|
85
|
+
SearchBot.search_bot?(http_user_agent)
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.to_agent(http_user_agent)
|
89
|
+
if self.search_bot? http_user_agent
|
90
|
+
SearchBot.new(http_user_agent)
|
91
|
+
else
|
92
|
+
Agent.new(http_user_agent)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.to_agent_s(http_user_agent)
|
97
|
+
agent = self.to_agent http_user_agent
|
98
|
+
"(%s, %s)" % [agent.name, agent.os]
|
99
|
+
end
|
100
|
+
|
101
|
+
# this ensures the below module methods actually make sense...
|
102
|
+
raise "Class #{base.name} should implement instance method 'http_user_agent'" unless base.instance_methods.include? 'http_user_agent'
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def search_bot?
|
108
|
+
self.class.search_bot?(self.http_user_agent)
|
109
|
+
end
|
110
|
+
|
111
|
+
def to_agent
|
112
|
+
self.class.to_agent(self.http_user_agent)
|
113
|
+
end
|
114
|
+
|
115
|
+
def to_agent_s
|
116
|
+
self.class.to_agent_s(self.http_user_agent)
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
|