ntail 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -12,4 +12,5 @@ group :development do
12
12
  gem "bundler", "~> 1.0.0"
13
13
  gem "jeweler", "~> 1.5.1"
14
14
  gem "rcov", ">= 0"
15
+ gem "geoip", ">= 0"
15
16
  end
@@ -1,6 +1,7 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
+ geoip (0.8.7)
4
5
  git (1.2.5)
5
6
  jeweler (1.5.1)
6
7
  bundler (~> 1.0.0)
@@ -17,6 +18,7 @@ PLATFORMS
17
18
 
18
19
  DEPENDENCIES
19
20
  bundler (~> 1.0.0)
21
+ geoip
20
22
  jeweler (~> 1.5.1)
21
23
  rainbow
22
24
  rcov
@@ -49,7 +49,7 @@ A tail(1)-like utility for nginx log files that supports parsing, filtering and
49
49
 
50
50
  * print out GeoIP country and city information for each HTTP request <em>(depends on the optional <tt>geoip</tt> gem)</em>
51
51
 
52
- > ntail -e '{ |line| puts [line.to_country, line.to_city].join("\t") }' /var/log/nginx/access.log
52
+ > ntail -e '{ |line| puts [line.to_country_s, line.to_city_s].join("\t") }' /var/log/nginx/access.log
53
53
  United States Los Angeles
54
54
  United States Houston
55
55
  Germany Berlin
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.0.7
@@ -1,5 +1,28 @@
1
1
  NTAIL_NAME = 'ntail'
2
2
  NTAIL_VERSION = '0.0.1'
3
3
 
4
+ # so-called components...
5
+ require 'ntail/remote_addr'
6
+ require 'ntail/remote_user'
7
+ require 'ntail/time_local'
8
+ require 'ntail/request'
9
+ require 'ntail/status'
10
+ require 'ntail/body_bytes_sent'
11
+ require 'ntail/http_referer'
12
+ require 'ntail/http_user_agent'
13
+ require 'ntail/proxy_addresses'
14
+
15
+ # so-called sub-components...
16
+ require 'ntail/http_method'
17
+ require 'ntail/uri'
18
+ require 'ntail/http_version'
19
+
20
+ # additional utility functions...
21
+ require 'ntail/known_ip_addresses'
22
+ require 'ntail/local_ip_addresses'
23
+
24
+ # the core classes...
4
25
  require 'ntail/log_line'
5
- require 'ntail/application'
26
+ require 'ntail/application'
27
+
28
+ # That's all, Folks!
@@ -12,17 +12,32 @@ module NginxTail
12
12
  def self.ntail_options
13
13
  # shamelessly copied from lib/rake.rb (rake gem)
14
14
  [
15
- ['--verbose', '--v', "Run verbosely (log messages to STDERR).",
15
+ ['--verbose', '-v', "Run verbosely (log messages to STDERR).",
16
16
  lambda { |value|
17
17
  self.options.verbose = true
18
18
  }
19
19
  ],
20
+ ['--dry-run', '-n', "Dry-run: process files, but don't actually parse the lines",
21
+ lambda { |value|
22
+ self.options.dry_run = true
23
+ }
24
+ ],
25
+ ['--parse-only', '-p', "Parse only: parse all lines, but don't actually process them",
26
+ lambda { |value|
27
+ self.options.parse_only = true
28
+ }
29
+ ],
20
30
  ['--version', '-V', "Display the program version.",
21
31
  lambda { |value|
22
32
  puts "#{NTAIL_NAME}, version #{NTAIL_VERSION}"
23
33
  self.options.running = false
24
34
  }
25
35
  ],
36
+ ['--line-number', '-l LINE_NUMBER', "Only process the line with the given line number",
37
+ lambda { |value|
38
+ self.options.line_number = value.to_i
39
+ }
40
+ ],
26
41
  ['--filter', '-f CODE', "Ruby code block for filtering (parsed) lines - needs to return true or false.",
27
42
  lambda { |value|
28
43
  self.options.filter = eval "Proc.new #{value}"
@@ -39,6 +54,7 @@ module NginxTail
39
54
  def self.parse_options
40
55
 
41
56
  # application defaults...
57
+ self.options.interrupted = false
42
58
  self.options.running = true
43
59
  self.options.exit = 0
44
60
 
@@ -62,47 +78,63 @@ module NginxTail
62
78
 
63
79
  ['TERM', 'INT'].each do |signal|
64
80
  Signal.trap(signal) do
65
- self.options.running = false ; puts
81
+ self.options.running = false ; self.options.interrupted = true
82
+ $stdin.close if ARGF.file == $stdin # ie. reading from STDIN
66
83
  end
67
84
  end
68
85
 
69
- lines_read = lines_processed = lines_ignored = parsable_lines = unparsable_lines = 0
86
+ files_read = lines_read = lines_processed = lines_ignored = parsable_lines = unparsable_lines = 0
70
87
 
71
88
  while self.options.running and ARGF.gets
89
+ if ARGF.file.lineno == 1
90
+ files_read += 1
91
+ if self.options.verbose
92
+ $stderr.puts "[INFO] now processing file #{ARGF.filename}"
93
+ end
94
+ end
72
95
  raw_line = $_.chomp ; lines_read += 1
73
- begin
74
- log_line = NginxTail::LogLine.new(raw_line)
75
- if log_line.parsable
76
- parsable_lines += 1
77
- if !self.options.filter || self.options.filter.call(log_line)
78
- lines_processed += 1
79
- if self.options.code
80
- self.options.code.call(log_line)
96
+ unless self.options.dry_run
97
+ if !self.options.line_number or self.options.line_number == ARGF.lineno
98
+ begin
99
+ log_line = NginxTail::LogLine.new(raw_line)
100
+ if log_line.parsable
101
+ parsable_lines += 1
102
+ unless self.options.parse_only
103
+ if !self.options.filter || self.options.filter.call(log_line)
104
+ lines_processed += 1
105
+ if self.options.code
106
+ self.options.code.call(log_line)
107
+ else
108
+ puts log_line
109
+ end
110
+ else
111
+ lines_ignored += 1
112
+ if self.options.verbose
113
+ $stderr.puts "[WARNING] ignoring line ##{lines_read}"
114
+ end
115
+ end
116
+ end
81
117
  else
82
- puts log_line
118
+ unparsable_lines += 1
119
+ if self.options.verbose
120
+ $stderr.puts "[ERROR] cannot parse '#{raw_line}'"
121
+ end
83
122
  end
84
- else
85
- lines_ignored += 1
86
- if self.options.verbose
87
- $stderr.puts "[WARNING] ignoring line ##{lines_read}"
88
- end
89
- end
90
- else
91
- unparsable_lines += 1
92
- if self.options.verbose
93
- $stderr.puts "[ERROR] cannot parse '#{raw_line}'"
123
+ rescue
124
+ $stderr.puts "[ERROR] processing line #{ARGF.file.lineno} of file #{ARGF.filename} resulted in #{$!.message}"
125
+ $stderr.puts "[ERROR] " + raw_line
126
+ self.options.exit = -1
127
+ self.options.running = false
128
+ raise $! # TODO if the "re-raise exceptions" option has been set...
94
129
  end
95
130
  end
96
- rescue
97
- $stderr.puts "[ERROR] processing line #{lines_read} resulted in #{$!.message}"
98
- $stderr.puts "[ERROR] " + raw_line
99
- self.options.exit = -1
100
- self.options.running = false
101
131
  end
102
132
  end
103
133
 
104
134
  if self.options.verbose
105
- $stderr.puts "[INFO] read #{lines_read} lines"
135
+ $stderr.puts if self.options.interrupted
136
+ $stderr.print "[INFO] read #{lines_read} lines in #{files_read} files"
137
+ $stderr.print " (interrupted)" if self.options.interrupted ; $stderr.puts
106
138
  $stderr.puts "[INFO] #{parsable_lines} parsable lines, #{unparsable_lines} unparsable lines"
107
139
  $stderr.puts "[INFO] processed #{lines_processed} lines, ignored #{lines_ignored} lines"
108
140
  end
@@ -0,0 +1,14 @@
1
+ module NginxTail
2
+ module BodyBytesSent
3
+
4
+ def self.included(base) # :nodoc:
5
+ base.class_eval do
6
+
7
+ # this ensures the below module methods actually make sense...
8
+ raise "Class #{base.name} should implement instance method 'body_bytes_sent'" unless base.instance_methods.include? 'body_bytes_sent'
9
+
10
+ end
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,34 @@
1
+ module NginxTail
2
+ module HttpMethod
3
+
4
+ #
5
+ # http://www.ietf.org/rfc/rfc2616.txt - "section 5.1.1 Method"
6
+ #
7
+ # OPTIONS GET HEAD POST PUT DELETE TRACE CONNECT
8
+ #
9
+
10
+ def self.included(base) # :nodoc:
11
+ base.class_eval do
12
+
13
+ def self.to_http_method_s(http_method)
14
+ (http_method ||= "").upcase! # will be nil if $request == "-" (ie. "dodgy" HTTP requests)
15
+ case http_method
16
+ when "POST", "PUT", "DELETE"
17
+ http_method.inverse # if Sickill::Rainbow.enabled...
18
+ else
19
+ http_method
20
+ end
21
+ end
22
+
23
+ # this ensures the below module methods actually make sense...
24
+ raise "Class #{base.name} should implement instance method 'http_method'" unless base.instance_methods.include? 'http_method'
25
+
26
+ end
27
+ end
28
+
29
+ def to_http_method_s
30
+ self.class.to_http_method_s(self.http_method)
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,81 @@
1
+ require 'uri'
2
+
3
+ module NginxTail
4
+ module HttpReferer
5
+
6
+ #
7
+ # to easily identify external referers, for filtering and formatting purposes
8
+ #
9
+ # e.g. Regexp.compile('^http(s)?://(www\.)?MY_WEBSITE_NAME\.com')
10
+ #
11
+
12
+ UNKNOWN_REFERER = "-".freeze # the 'default' nginx value for $http_referer variable
13
+
14
+ def self.included(base) # :nodoc:
15
+ base.class_eval do
16
+
17
+ @@internal_referers = []
18
+
19
+ # mainly (solely?) for testing purposes...
20
+ def self.reset_internal_referers()
21
+ while !@@internal_referers.empty? ; @@internal_referers.pop ; end
22
+ end
23
+
24
+ # mainly (solely?) for testing purposes...
25
+ def self.internal_referers()
26
+ @@internal_referers.dup
27
+ end
28
+
29
+ def self.add_internal_referer(internal_referer)
30
+ raise "Cannot add unkown HTTP referer" if self.unknown_referer? internal_referer
31
+ (@@internal_referers << internal_referer).uniq!
32
+ end
33
+
34
+ def self.unknown_referer?(http_referer)
35
+ http_referer == UNKNOWN_REFERER
36
+ end
37
+
38
+ def self.internal_referer?(http_referer)
39
+ !self.unknown_referer?(http_referer) && !@@internal_referers.detect { |referer| referer.match(http_referer) }.nil?
40
+ end
41
+
42
+ def self.external_referer?(http_referer)
43
+ !self.unknown_referer?(http_referer) && !self.internal_referer?(http_referer)
44
+ end
45
+
46
+ def self.to_referer_s(http_referer)
47
+ if self.unknown_referer? http_referer
48
+ http_referer
49
+ else begin
50
+ # try to parse it as a URI, but with default value if un-parsable
51
+ URI.parse(http_referer).host || http_referer
52
+ rescue URI::InvalidURIError
53
+ http_referer
54
+ end
55
+ end
56
+ end
57
+
58
+ # this ensures the below module methods actually make sense...
59
+ raise "Class #{base.name} should implement instance method 'http_referer'" unless base.instance_methods.include? 'http_referer'
60
+
61
+ end
62
+ end
63
+
64
+ def unknown_referer?
65
+ self.class.unknown_referer?(self.http_referer)
66
+ end
67
+
68
+ def internal_referer?
69
+ self.class.internal_referer?(self.http_referer)
70
+ end
71
+
72
+ def external_referer?
73
+ self.class.external_referer?(self.http_referer)
74
+ end
75
+
76
+ def to_referer_s
77
+ self.class.to_referer_s(self.http_referer)
78
+ end
79
+
80
+ end
81
+ end
@@ -0,0 +1,122 @@
1
+ require 'rubygems'
2
+ require 'user-agent'
3
+
4
+ class Agent
5
+
6
+ def search_bot?
7
+ false
8
+ end
9
+
10
+ end
11
+
12
+ class SearchBot < Agent
13
+
14
+ def search_bot?
15
+ true
16
+ end
17
+
18
+ #
19
+ # Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
20
+ # Googlebot-Image/1.0
21
+ # msnbot/2.0b (+http://search.msn.com/msnbot.htm)
22
+ # msnbot/2.0b (+http://search.msn.com/msnbot.htm).
23
+ # msnbot/2.0b (+http://search.msn.com/msnbot.htm)._
24
+ # Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)
25
+ # Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)
26
+ # ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)
27
+ # Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
28
+ #
29
+
30
+ KNOWN_SEARCH_BOTS = [
31
+ GOOGLE_BOT = Regexp.compile('Googlebot.*\/'),
32
+ MSN_BOT = Regexp.compile('msnbot\/'),
33
+ YAHOO_BOT = Regexp.compile('Yahoo! Slurp\/?'),
34
+ PINGDOM_BOT = Regexp.compile('Pingdom.com_bot_version_'),
35
+ ALEXA_BOT = Regexp.compile('ia_archiver'),
36
+ YANDEX_BOT = Regexp.compile('YandexBot\/'),
37
+ ]
38
+
39
+ def self.search_bot?(http_user_agent)
40
+ !KNOWN_SEARCH_BOTS.detect { |bot| bot.match(http_user_agent) }.nil?
41
+ end
42
+
43
+ attr_accessor :name
44
+ attr_accessor :os
45
+
46
+ def initialize(string)
47
+ super string
48
+ @name = self.class.name_for_user_agent(string)
49
+ @os = self.class.os_for_user_agent(string)
50
+ end
51
+
52
+ def self.name_for_user_agent string
53
+ case string
54
+ when GOOGLE_BOT then :googlebot
55
+ when MSN_BOT then :msnbot
56
+ when YAHOO_BOT then :yahoo_slurp
57
+ when ALEXA_BOT then :ia_archiver
58
+ when PINGDOM_BOT then :pingdom_bot
59
+ when YANDEX_BOT then :yandex_bot
60
+ else super(string)
61
+ end
62
+ end
63
+
64
+ def self.os_for_user_agent string
65
+ case string
66
+ when GOOGLE_BOT then :"google.com"
67
+ when MSN_BOT then :"msn.com"
68
+ when YAHOO_BOT then :"yahoo.com"
69
+ when ALEXA_BOT then :"alexa.com"
70
+ when PINGDOM_BOT then :"pingdom.com"
71
+ when YANDEX_BOT then :"yandex.com"
72
+ else super(string)
73
+ end
74
+ end
75
+
76
+ end
77
+
78
+ module NginxTail
79
+ module HttpUserAgent
80
+
81
+ def self.included(base) # :nodoc:
82
+ base.class_eval do
83
+
84
+ def self.search_bot?(http_user_agent)
85
+ SearchBot.search_bot?(http_user_agent)
86
+ end
87
+
88
+ def self.to_agent(http_user_agent)
89
+ if self.search_bot? http_user_agent
90
+ SearchBot.new(http_user_agent)
91
+ else
92
+ Agent.new(http_user_agent)
93
+ end
94
+ end
95
+
96
+ def self.to_agent_s(http_user_agent)
97
+ agent = self.to_agent http_user_agent
98
+ "(%s, %s)" % [agent.name, agent.os]
99
+ end
100
+
101
+ # this ensures the below module methods actually make sense...
102
+ raise "Class #{base.name} should implement instance method 'http_user_agent'" unless base.instance_methods.include? 'http_user_agent'
103
+
104
+ end
105
+ end
106
+
107
+ def search_bot?
108
+ self.class.search_bot?(self.http_user_agent)
109
+ end
110
+
111
+ def to_agent
112
+ self.class.to_agent(self.http_user_agent)
113
+ end
114
+
115
+ def to_agent_s
116
+ self.class.to_agent_s(self.http_user_agent)
117
+ end
118
+
119
+ end
120
+ end
121
+
122
+