ntail 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -12,4 +12,5 @@ group :development do
12
12
  gem "bundler", "~> 1.0.0"
13
13
  gem "jeweler", "~> 1.5.1"
14
14
  gem "rcov", ">= 0"
15
+ gem "geoip", ">= 0"
15
16
  end
@@ -1,6 +1,7 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
+ geoip (0.8.7)
4
5
  git (1.2.5)
5
6
  jeweler (1.5.1)
6
7
  bundler (~> 1.0.0)
@@ -17,6 +18,7 @@ PLATFORMS
17
18
 
18
19
  DEPENDENCIES
19
20
  bundler (~> 1.0.0)
21
+ geoip
20
22
  jeweler (~> 1.5.1)
21
23
  rainbow
22
24
  rcov
@@ -49,7 +49,7 @@ A tail(1)-like utility for nginx log files that supports parsing, filtering and
49
49
 
50
50
  * print out GeoIP country and city information for each HTTP request <em>(depends on the optional <tt>geoip</tt> gem)</em>
51
51
 
52
- > ntail -e '{ |line| puts [line.to_country, line.to_city].join("\t") }' /var/log/nginx/access.log
52
+ > ntail -e '{ |line| puts [line.to_country_s, line.to_city_s].join("\t") }' /var/log/nginx/access.log
53
53
  United States Los Angeles
54
54
  United States Houston
55
55
  Germany Berlin
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.0.7
@@ -1,5 +1,28 @@
1
1
  NTAIL_NAME = 'ntail'
2
2
  NTAIL_VERSION = '0.0.1'
3
3
 
4
+ # so-called components...
5
+ require 'ntail/remote_addr'
6
+ require 'ntail/remote_user'
7
+ require 'ntail/time_local'
8
+ require 'ntail/request'
9
+ require 'ntail/status'
10
+ require 'ntail/body_bytes_sent'
11
+ require 'ntail/http_referer'
12
+ require 'ntail/http_user_agent'
13
+ require 'ntail/proxy_addresses'
14
+
15
+ # so-called sub-components...
16
+ require 'ntail/http_method'
17
+ require 'ntail/uri'
18
+ require 'ntail/http_version'
19
+
20
+ # additional utility functions...
21
+ require 'ntail/known_ip_addresses'
22
+ require 'ntail/local_ip_addresses'
23
+
24
+ # the core classes...
4
25
  require 'ntail/log_line'
5
- require 'ntail/application'
26
+ require 'ntail/application'
27
+
28
+ # That's all, Folks!
@@ -12,17 +12,32 @@ module NginxTail
12
12
  def self.ntail_options
13
13
  # shamelessly copied from lib/rake.rb (rake gem)
14
14
  [
15
- ['--verbose', '--v', "Run verbosely (log messages to STDERR).",
15
+ ['--verbose', '-v', "Run verbosely (log messages to STDERR).",
16
16
  lambda { |value|
17
17
  self.options.verbose = true
18
18
  }
19
19
  ],
20
+ ['--dry-run', '-n', "Dry-run: process files, but don't actually parse the lines",
21
+ lambda { |value|
22
+ self.options.dry_run = true
23
+ }
24
+ ],
25
+ ['--parse-only', '-p', "Parse only: parse all lines, but don't actually process them",
26
+ lambda { |value|
27
+ self.options.parse_only = true
28
+ }
29
+ ],
20
30
  ['--version', '-V', "Display the program version.",
21
31
  lambda { |value|
22
32
  puts "#{NTAIL_NAME}, version #{NTAIL_VERSION}"
23
33
  self.options.running = false
24
34
  }
25
35
  ],
36
+ ['--line-number', '-l LINE_NUMBER', "Only process the line with the given line number",
37
+ lambda { |value|
38
+ self.options.line_number = value.to_i
39
+ }
40
+ ],
26
41
  ['--filter', '-f CODE', "Ruby code block for filtering (parsed) lines - needs to return true or false.",
27
42
  lambda { |value|
28
43
  self.options.filter = eval "Proc.new #{value}"
@@ -39,6 +54,7 @@ module NginxTail
39
54
  def self.parse_options
40
55
 
41
56
  # application defaults...
57
+ self.options.interrupted = false
42
58
  self.options.running = true
43
59
  self.options.exit = 0
44
60
 
@@ -62,47 +78,63 @@ module NginxTail
62
78
 
63
79
  ['TERM', 'INT'].each do |signal|
64
80
  Signal.trap(signal) do
65
- self.options.running = false ; puts
81
+ self.options.running = false ; self.options.interrupted = true
82
+ $stdin.close if ARGF.file == $stdin # ie. reading from STDIN
66
83
  end
67
84
  end
68
85
 
69
- lines_read = lines_processed = lines_ignored = parsable_lines = unparsable_lines = 0
86
+ files_read = lines_read = lines_processed = lines_ignored = parsable_lines = unparsable_lines = 0
70
87
 
71
88
  while self.options.running and ARGF.gets
89
+ if ARGF.file.lineno == 1
90
+ files_read += 1
91
+ if self.options.verbose
92
+ $stderr.puts "[INFO] now processing file #{ARGF.filename}"
93
+ end
94
+ end
72
95
  raw_line = $_.chomp ; lines_read += 1
73
- begin
74
- log_line = NginxTail::LogLine.new(raw_line)
75
- if log_line.parsable
76
- parsable_lines += 1
77
- if !self.options.filter || self.options.filter.call(log_line)
78
- lines_processed += 1
79
- if self.options.code
80
- self.options.code.call(log_line)
96
+ unless self.options.dry_run
97
+ if !self.options.line_number or self.options.line_number == ARGF.lineno
98
+ begin
99
+ log_line = NginxTail::LogLine.new(raw_line)
100
+ if log_line.parsable
101
+ parsable_lines += 1
102
+ unless self.options.parse_only
103
+ if !self.options.filter || self.options.filter.call(log_line)
104
+ lines_processed += 1
105
+ if self.options.code
106
+ self.options.code.call(log_line)
107
+ else
108
+ puts log_line
109
+ end
110
+ else
111
+ lines_ignored += 1
112
+ if self.options.verbose
113
+ $stderr.puts "[WARNING] ignoring line ##{lines_read}"
114
+ end
115
+ end
116
+ end
81
117
  else
82
- puts log_line
118
+ unparsable_lines += 1
119
+ if self.options.verbose
120
+ $stderr.puts "[ERROR] cannot parse '#{raw_line}'"
121
+ end
83
122
  end
84
- else
85
- lines_ignored += 1
86
- if self.options.verbose
87
- $stderr.puts "[WARNING] ignoring line ##{lines_read}"
88
- end
89
- end
90
- else
91
- unparsable_lines += 1
92
- if self.options.verbose
93
- $stderr.puts "[ERROR] cannot parse '#{raw_line}'"
123
+ rescue
124
+ $stderr.puts "[ERROR] processing line #{ARGF.file.lineno} of file #{ARGF.filename} resulted in #{$!.message}"
125
+ $stderr.puts "[ERROR] " + raw_line
126
+ self.options.exit = -1
127
+ self.options.running = false
128
+ raise $! # TODO if the "re-raise exceptions" option has been set...
94
129
  end
95
130
  end
96
- rescue
97
- $stderr.puts "[ERROR] processing line #{lines_read} resulted in #{$!.message}"
98
- $stderr.puts "[ERROR] " + raw_line
99
- self.options.exit = -1
100
- self.options.running = false
101
131
  end
102
132
  end
103
133
 
104
134
  if self.options.verbose
105
- $stderr.puts "[INFO] read #{lines_read} lines"
135
+ $stderr.puts if self.options.interrupted
136
+ $stderr.print "[INFO] read #{lines_read} lines in #{files_read} files"
137
+ $stderr.print " (interrupted)" if self.options.interrupted ; $stderr.puts
106
138
  $stderr.puts "[INFO] #{parsable_lines} parsable lines, #{unparsable_lines} unparsable lines"
107
139
  $stderr.puts "[INFO] processed #{lines_processed} lines, ignored #{lines_ignored} lines"
108
140
  end
@@ -0,0 +1,14 @@
1
+ module NginxTail
2
+ module BodyBytesSent
3
+
4
+ def self.included(base) # :nodoc:
5
+ base.class_eval do
6
+
7
+ # this ensures the below module methods actually make sense...
8
+ raise "Class #{base.name} should implement instance method 'body_bytes_sent'" unless base.instance_methods.include? 'body_bytes_sent'
9
+
10
+ end
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,34 @@
1
+ module NginxTail
2
+ module HttpMethod
3
+
4
+ #
5
+ # http://www.ietf.org/rfc/rfc2616.txt - "section 5.1.1 Method"
6
+ #
7
+ # OPTIONS GET HEAD POST PUT DELETE TRACE CONNECT
8
+ #
9
+
10
+ def self.included(base) # :nodoc:
11
+ base.class_eval do
12
+
13
+ def self.to_http_method_s(http_method)
14
+ (http_method ||= "").upcase! # will be nil if $request == "-" (ie. "dodgy" HTTP requests)
15
+ case http_method
16
+ when "POST", "PUT", "DELETE"
17
+ http_method.inverse # if Sickill::Rainbow.enabled...
18
+ else
19
+ http_method
20
+ end
21
+ end
22
+
23
+ # this ensures the below module methods actually make sense...
24
+ raise "Class #{base.name} should implement instance method 'http_method'" unless base.instance_methods.include? 'http_method'
25
+
26
+ end
27
+ end
28
+
29
+ def to_http_method_s
30
+ self.class.to_http_method_s(self.http_method)
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,81 @@
1
+ require 'uri'
2
+
3
+ module NginxTail
4
+ module HttpReferer
5
+
6
+ #
7
+ # to easily identify external referers, for filtering and formatting purposes
8
+ #
9
+ # e.g. Regexp.compile('^http(s)?://(www\.)?MY_WEBSITE_NAME\.com')
10
+ #
11
+
12
+ UNKNOWN_REFERER = "-".freeze # the 'default' nginx value for $http_referer variable
13
+
14
+ def self.included(base) # :nodoc:
15
+ base.class_eval do
16
+
17
+ @@internal_referers = []
18
+
19
+ # mainly (solely?) for testing purposes...
20
+ def self.reset_internal_referers()
21
+ while !@@internal_referers.empty? ; @@internal_referers.pop ; end
22
+ end
23
+
24
+ # mainly (solely?) for testing purposes...
25
+ def self.internal_referers()
26
+ @@internal_referers.dup
27
+ end
28
+
29
+ def self.add_internal_referer(internal_referer)
30
+ raise "Cannot add unkown HTTP referer" if self.unknown_referer? internal_referer
31
+ (@@internal_referers << internal_referer).uniq!
32
+ end
33
+
34
+ def self.unknown_referer?(http_referer)
35
+ http_referer == UNKNOWN_REFERER
36
+ end
37
+
38
+ def self.internal_referer?(http_referer)
39
+ !self.unknown_referer?(http_referer) && !@@internal_referers.detect { |referer| referer.match(http_referer) }.nil?
40
+ end
41
+
42
+ def self.external_referer?(http_referer)
43
+ !self.unknown_referer?(http_referer) && !self.internal_referer?(http_referer)
44
+ end
45
+
46
+ def self.to_referer_s(http_referer)
47
+ if self.unknown_referer? http_referer
48
+ http_referer
49
+ else begin
50
+ # try to parse it as a URI, but with default value if un-parsable
51
+ URI.parse(http_referer).host || http_referer
52
+ rescue URI::InvalidURIError
53
+ http_referer
54
+ end
55
+ end
56
+ end
57
+
58
+ # this ensures the below module methods actually make sense...
59
+ raise "Class #{base.name} should implement instance method 'http_referer'" unless base.instance_methods.include? 'http_referer'
60
+
61
+ end
62
+ end
63
+
64
+ def unknown_referer?
65
+ self.class.unknown_referer?(self.http_referer)
66
+ end
67
+
68
+ def internal_referer?
69
+ self.class.internal_referer?(self.http_referer)
70
+ end
71
+
72
+ def external_referer?
73
+ self.class.external_referer?(self.http_referer)
74
+ end
75
+
76
+ def to_referer_s
77
+ self.class.to_referer_s(self.http_referer)
78
+ end
79
+
80
+ end
81
+ end
@@ -0,0 +1,122 @@
1
+ require 'rubygems'
2
+ require 'user-agent'
3
+
4
+ class Agent
5
+
6
+ def search_bot?
7
+ false
8
+ end
9
+
10
+ end
11
+
12
+ class SearchBot < Agent
13
+
14
+ def search_bot?
15
+ true
16
+ end
17
+
18
+ #
19
+ # Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
20
+ # Googlebot-Image/1.0
21
+ # msnbot/2.0b (+http://search.msn.com/msnbot.htm)
22
+ # msnbot/2.0b (+http://search.msn.com/msnbot.htm).
23
+ # msnbot/2.0b (+http://search.msn.com/msnbot.htm)._
24
+ # Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)
25
+ # Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)
26
+ # ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)
27
+ # Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
28
+ #
29
+
30
+ KNOWN_SEARCH_BOTS = [
31
+ GOOGLE_BOT = Regexp.compile('Googlebot.*\/'),
32
+ MSN_BOT = Regexp.compile('msnbot\/'),
33
+ YAHOO_BOT = Regexp.compile('Yahoo! Slurp\/?'),
34
+ PINGDOM_BOT = Regexp.compile('Pingdom.com_bot_version_'),
35
+ ALEXA_BOT = Regexp.compile('ia_archiver'),
36
+ YANDEX_BOT = Regexp.compile('YandexBot\/'),
37
+ ]
38
+
39
+ def self.search_bot?(http_user_agent)
40
+ !KNOWN_SEARCH_BOTS.detect { |bot| bot.match(http_user_agent) }.nil?
41
+ end
42
+
43
+ attr_accessor :name
44
+ attr_accessor :os
45
+
46
+ def initialize(string)
47
+ super string
48
+ @name = self.class.name_for_user_agent(string)
49
+ @os = self.class.os_for_user_agent(string)
50
+ end
51
+
52
+ def self.name_for_user_agent string
53
+ case string
54
+ when GOOGLE_BOT then :googlebot
55
+ when MSN_BOT then :msnbot
56
+ when YAHOO_BOT then :yahoo_slurp
57
+ when ALEXA_BOT then :ia_archiver
58
+ when PINGDOM_BOT then :pingdom_bot
59
+ when YANDEX_BOT then :yandex_bot
60
+ else super(string)
61
+ end
62
+ end
63
+
64
+ def self.os_for_user_agent string
65
+ case string
66
+ when GOOGLE_BOT then :"google.com"
67
+ when MSN_BOT then :"msn.com"
68
+ when YAHOO_BOT then :"yahoo.com"
69
+ when ALEXA_BOT then :"alexa.com"
70
+ when PINGDOM_BOT then :"pingdom.com"
71
+ when YANDEX_BOT then :"yandex.com"
72
+ else super(string)
73
+ end
74
+ end
75
+
76
+ end
77
+
78
+ module NginxTail
79
+ module HttpUserAgent
80
+
81
+ def self.included(base) # :nodoc:
82
+ base.class_eval do
83
+
84
+ def self.search_bot?(http_user_agent)
85
+ SearchBot.search_bot?(http_user_agent)
86
+ end
87
+
88
+ def self.to_agent(http_user_agent)
89
+ if self.search_bot? http_user_agent
90
+ SearchBot.new(http_user_agent)
91
+ else
92
+ Agent.new(http_user_agent)
93
+ end
94
+ end
95
+
96
+ def self.to_agent_s(http_user_agent)
97
+ agent = self.to_agent http_user_agent
98
+ "(%s, %s)" % [agent.name, agent.os]
99
+ end
100
+
101
+ # this ensures the below module methods actually make sense...
102
+ raise "Class #{base.name} should implement instance method 'http_user_agent'" unless base.instance_methods.include? 'http_user_agent'
103
+
104
+ end
105
+ end
106
+
107
+ def search_bot?
108
+ self.class.search_bot?(self.http_user_agent)
109
+ end
110
+
111
+ def to_agent
112
+ self.class.to_agent(self.http_user_agent)
113
+ end
114
+
115
+ def to_agent_s
116
+ self.class.to_agent_s(self.http_user_agent)
117
+ end
118
+
119
+ end
120
+ end
121
+
122
+