ntail 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ module NginxTail
2
+ module HttpVersion
3
+
4
+ def self.included(base) # :nodoc:
5
+ base.class_eval do
6
+
7
+ def self.to_http_version_s(http_version)
8
+ http_version || "" # will be nil if $request == "-" (ie. "dodgy" HTTP requests)
9
+ end
10
+
11
+ # this ensures the below module methods actually make sense...
12
+ raise "Class #{base.name} should implement instance method 'http_version'" unless base.instance_methods.include? 'http_version'
13
+
14
+ end
15
+ end
16
+
17
+ def to_http_version_s
18
+ self.class.to_http_version_s(self.http_version)
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,44 @@
1
+ module NginxTail
2
+ module KnownIpAddresses
3
+
4
+ #
5
+ # known IP addresses, for filtering and formatting purposes
6
+ #
7
+ # e.g. office IP addresses, IP addresses of remote workers, ...
8
+ #
9
+
10
+ def self.included(base) # :nodoc:
11
+ base.class_eval do
12
+
13
+ @@known_ip_addresses = []
14
+
15
+ # mainly (solely?) for testing purposes...
16
+ def self.known_ip_addresses()
17
+ @@known_ip_addresses.dup
18
+ end
19
+
20
+ # mainly (solely?) for testing purposes...
21
+ def self.reset_known_ip_addresses()
22
+ while !@@known_ip_addresses.empty? ; @@known_ip_addresses.pop ; end
23
+ end
24
+
25
+ def self.add_known_ip_address(known_ip_address)
26
+ (@@known_ip_addresses << known_ip_address).uniq!
27
+ end
28
+
29
+ def self.known_ip_address?(remote_addr)
30
+ @@known_ip_addresses.include?(remote_addr)
31
+ end
32
+
33
+ # this ensures the below module methods actually make sense...
34
+ raise "Class #{base.name} should implement instance method 'remote_addr'" unless base.instance_methods.include? 'remote_addr'
35
+
36
+ end
37
+ end
38
+
39
+ def known_ip_address?
40
+ self.class.known_ip_address?(self.remote_addr)
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,44 @@
1
+ module NginxTail
2
+ module LocalIpAddresses
3
+
4
+ #
5
+ # local IP addresses, for filtering and formatting purposes
6
+ #
7
+ # e.g. downstream proxy servers (nginx web servers -> passenger app servers)
8
+ #
9
+
10
+ def self.included(base) # :nodoc:
11
+ base.class_eval do
12
+
13
+ @@local_ip_addresses = []
14
+
15
+ # mainly (solely?) for testing purposes...
16
+ def self.local_ip_addresses()
17
+ @@local_ip_addresses.dup
18
+ end
19
+
20
+ # mainly (solely?) for testing purposes...
21
+ def self.reset_local_ip_addresses()
22
+ while !@@local_ip_addresses.empty? ; @@local_ip_addresses.pop ; end
23
+ end
24
+
25
+ def self.add_local_ip_address(local_ip_address)
26
+ (@@local_ip_addresses << local_ip_address).uniq!
27
+ end
28
+
29
+ def self.local_ip_address?(remote_addr)
30
+ @@local_ip_addresses.include?(remote_addr)
31
+ end
32
+
33
+ # this ensures the below module methods actually make sense...
34
+ raise "Class #{base.name} should implement instance method 'remote_addr'" unless base.instance_methods.include? 'remote_addr'
35
+
36
+ end
37
+ end
38
+
39
+ def local_ip_address?
40
+ self.class.local_ip_address?(self.remote_addr)
41
+ end
42
+
43
+ end
44
+ end
@@ -1,36 +1,54 @@
1
- require 'date'
2
- require 'socket'
3
1
  require 'net/http'
4
2
 
5
3
  require 'rubygems'
6
4
  require 'rainbow'
7
- require 'user-agent'
8
-
9
- begin
10
- require 'geoip'
11
- rescue
12
- # NOOP (optional dependency)
13
- end
14
5
 
15
6
  module NginxTail
16
7
  class LogLine
17
8
 
18
- attr_accessor :raw_line
19
- attr_accessor :parsable
9
+ def self.component_to_module_name(component)
10
+ # this mimicks the ActiveSupport::Inflector.camelize() method in Rails...
11
+ component.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
12
+ end
13
+
14
+ def self.component_to_ntail_module(component)
15
+ # this mimicks the ActiveSupport::Inflector.constantize() method in Rails...
16
+ NginxTail.const_get(self.component_to_module_name(component))
17
+ end
18
+
19
+ attr_reader :raw_line
20
+ attr_reader :parsable
20
21
 
21
- attr_accessor :remote_address
22
- attr_accessor :remote_user
23
- attr_accessor :time_local
24
- attr_accessor :request
25
- attr_accessor :status
26
- attr_accessor :body_bytes_sent
27
- attr_accessor :http_referer
28
- attr_accessor :http_user_agent
29
- attr_accessor :proxy_addresses
30
-
31
- attr_accessor :http_method
32
- attr_accessor :uri
33
- attr_accessor :http_version
22
+ COMPONENTS = [
23
+ :remote_addr,
24
+ :remote_user,
25
+ :time_local,
26
+ :request,
27
+ :status,
28
+ :body_bytes_sent,
29
+ :http_referer,
30
+ :http_user_agent,
31
+ :proxy_addresses,
32
+ ]
33
+
34
+ COMPONENTS.each do |symbol|
35
+ attr_reader symbol
36
+ include component_to_ntail_module(symbol)
37
+ end
38
+
39
+ include KnownIpAddresses # module to identify known IP addresses
40
+ include LocalIpAddresses # module to identify local IP addresses
41
+
42
+ SUBCOMPONENTS = [
43
+ :http_method,
44
+ :uri,
45
+ :http_version,
46
+ ]
47
+
48
+ SUBCOMPONENTS.each do |symbol|
49
+ attr_reader symbol
50
+ include component_to_ntail_module(symbol)
51
+ end
34
52
 
35
53
  #
36
54
  # http://wiki.nginx.org/NginxHttpLogModule#log_format - we currently only support the default "combined" log format...
@@ -42,7 +60,7 @@ module NginxTail
42
60
 
43
61
  def initialize(line)
44
62
  @parsable = if NGINX_LOG_PATTERN.match(@raw_line = line)
45
- @remote_address, @remote_user, @time_local, @request, @status, @body_bytes_sent, @http_referer, @http_user_agent, @proxy_addresses = $~.captures
63
+ @remote_addr, @remote_user, @time_local, @request, @status, @body_bytes_sent, @http_referer, @http_user_agent, @proxy_addresses = $~.captures
46
64
  if NGINX_REQUEST_PATTERN.match(@request)
47
65
  # counter example (ie. HTTP request that cannot by parsed)
48
66
  # 91.203.96.51 - - [21/Dec/2010:05:26:53 +0000] "-" 400 0 "-" "-"
@@ -57,6 +75,8 @@ module NginxTail
57
75
  end
58
76
  end
59
77
 
78
+ alias_method :remote_address, :remote_addr # a non-abbreviated alias, for convenience and readability...
79
+
60
80
  # for now, until we make it fancier...
61
81
  def method_missing(method, *params)
62
82
  raw_line.send method, *params
@@ -72,52 +92,50 @@ module NginxTail
72
92
  else
73
93
  :default
74
94
  end
75
- "%s - %#{Sickill::Rainbow.enabled ? 15 + 9 : 15}s - %s - %s - %s" % [
76
- to_date.strftime("%Y-%m-%d %X").foreground(color),
95
+ "%s - %#{Sickill::Rainbow.enabled ? 15 + 9 : 15}s - %s - %s - %s - %s" % [
96
+ to_date_s.foreground(color),
77
97
  remote_address.foreground(color),
78
98
  status.foreground(color),
79
- (uri || "-").foreground(color),
80
- to_agent_s.foreground(color)
99
+ to_request_s.foreground(color),
100
+ to_agent_s.foreground(color),
101
+ to_referer_s.foreground(color).inverse
81
102
  ]
82
103
  end
83
-
84
- COMPONENTS = [
85
-
86
- :remote_address,
87
- :remote_user,
88
- :time_local,
89
- :request,
90
- :status,
91
- :body_bytes_sent,
92
- :http_referer,
93
- :http_user_agent,
94
- :proxy_addresses,
95
-
96
- ]
97
-
98
- SUBCOMPONENTS = [
99
-
100
- :http_method,
101
- :uri,
102
- :http_version,
103
-
104
- ]
105
104
 
106
105
  CONVERSIONS = [
107
106
 
108
107
  :to_date,
108
+ :to_date_s,
109
+
109
110
  :to_agent,
111
+ :to_agent_s,
110
112
 
111
113
  :to_host_name,
112
- :to_country,
113
- :to_city,
114
+ :to_refering_website,
115
+
116
+ :to_country_s,
117
+ :to_city_s,
114
118
 
115
119
  ]
116
120
 
117
- def self.log_subcomponent?(subcomponent) SUBCOMPONENTS.include?(subcomponent) ; end # TODO replace with some clever meta-programming...
118
- def self.log_component?(component) COMPONENTS.include?(component) ; end # TODO replace with some clever meta-programming...
119
- def self.log_conversion?(conversion) CONVERSIONS.include?(conversion) ; end # TODO replace with some clever meta-programming...
120
- def self.log_directive?(directive) (directive == :full) or log_conversion?(directive) or log_component?(directive) or log_subcomponent?(directive) ; end
121
+ def self.log_subcomponent?(subcomponent)
122
+ # TODO replace with some clever meta-programming...
123
+ SUBCOMPONENTS.include?(subcomponent)
124
+ end
125
+
126
+ def self.log_component?(component)
127
+ # TODO replace with some clever meta-programming...
128
+ COMPONENTS.include?(component)
129
+ end
130
+
131
+ def self.log_conversion?(conversion)
132
+ # TODO replace with some clever meta-programming...
133
+ CONVERSIONS.include?(conversion)
134
+ end
135
+
136
+ def self.log_directive?(directive)
137
+ (directive == :full) or log_conversion?(directive) or log_component?(directive) or log_subcomponent?(directive)
138
+ end
121
139
 
122
140
  #
123
141
  # extraction filters for log line components
@@ -165,148 +183,6 @@ module NginxTail
165
183
  def self.valid_referer?(referer) true ; end
166
184
  def self.valid_user_agent?(user_agent) true ; end
167
185
 
168
- #
169
- # conversion of log line components
170
- #
171
-
172
- # >> DateTime.strptime("13/Apr/2010:04:45:51 +0100", '%d/%b/%Y:%T %z').to_s
173
- # => "2010-04-13T04:45:51+01:00"
174
- # >> DateTime.strptime("13/Apr/2010:04:45:51 +0100", '%d/%b/%Y:%H:%M:%S %z').to_s
175
- # => "2010-04-13T04:45:51+01:00"
176
- # >> _
177
-
178
- def to_date() DateTime.strptime(self.time_local, '%d/%b/%Y:%T %z') ; end
179
-
180
- class SearchBot < Agent
181
- attr_accessor :name
182
- attr_accessor :os
183
- def initialize(string)
184
- super string
185
- @name = self.class.name_for_user_agent(string)
186
- @os = self.class.os_for_user_agent(string)
187
- end
188
- def self.name_for_user_agent string
189
- case string
190
- when GOOGLE_BOT then "googlebot"
191
- when MSN_BOT then "msnbot"
192
- when YAHOO_BOT then "yahoo_slurp"
193
- when ALEXA_BOT then "ia_archiver"
194
- when PINGDOM_BOT then "pingdom_bot"
195
- when YANDEX_BOT then "yandex_bot"
196
- else super(string)
197
- end
198
- end
199
- def self.os_for_user_agent string
200
- case string
201
- when GOOGLE_BOT then "google.com"
202
- when MSN_BOT then "msn.com"
203
- when YAHOO_BOT then "yahoo.com"
204
- when ALEXA_BOT then "alexa.com"
205
- when PINGDOM_BOT then "pingdom.com"
206
- when YANDEX_BOT then "yandex.com"
207
- else super(string)
208
- end
209
- end
210
- end
211
-
212
- def to_agent()
213
- if known_search_bot?
214
- SearchBot.new(self.http_user_agent)
215
- else
216
- Agent.new(self.http_user_agent)
217
- end
218
- end
219
-
220
- def to_agent_s()
221
- agent = self.to_agent ; "(%s, %s)" % [agent.name, agent.os]
222
- end
223
-
224
- def to_host_name()
225
- Socket::getaddrinfo(self.remote_address,nil)[0][2]
226
- end
227
-
228
- if defined? GeoIP # ie. if the optional GeoIP gem is installed
229
-
230
- if File.exists?('/usr/share/GeoIP/GeoIP.dat')
231
- def to_country()
232
- record = GeoIP.new('/usr/share/GeoIP/GeoIP.dat').country(self.remote_address) ; record ? record[5] : 'N/A'
233
- end
234
- end
235
-
236
- if File.exists?('/usr/share/GeoIP/GeoIPCity.dat')
237
- def to_city()
238
- record = GeoIP.new('/usr/share/GeoIP/GeoIPCity.dat').city(self.remote_address) ; record ? record[7] : 'N/A'
239
- end
240
- end
241
-
242
- end
243
-
244
- #
245
- # downstream proxy servers
246
- #
247
-
248
- PROXY_IP_ADDRESSES = %w{
249
- 192.168.0.2
250
- 192.168.0.3
251
- 192.168.0.4
252
- }
253
-
254
- def self.proxy_ip_address?(remote_address) PROXY_IP_ADDRESSES.include?(remote_address) ; end
255
- def proxy_ip_address?() self.class.proxy_ip_address?(self.remote_address) ; end
256
-
257
- #
258
- # known IP addresses, for filtering purposes
259
- #
260
-
261
- OFFICE_IP_ADDRESSES = %w{
262
- }
263
-
264
- def self.office_ip_address?(remote_address) OFFICE_IP_ADDRESSES.include?(remote_address) ; end
265
- def office_ip_address?() self.class.office_ip_address?(self.remote_address) ; end
266
-
267
- #
268
- # Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
269
- # Googlebot-Image/1.0
270
- # msnbot/2.0b (+http://search.msn.com/msnbot.htm)
271
- # msnbot/2.0b (+http://search.msn.com/msnbot.htm).
272
- # msnbot/2.0b (+http://search.msn.com/msnbot.htm)._
273
- # Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)
274
- # Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)
275
- # ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)
276
- # Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
277
- #
278
-
279
- KNOWN_SEARCH_BOTS = [
280
- GOOGLE_BOT = Regexp.compile('Googlebot.*\/'),
281
- MSN_BOT = Regexp.compile('msnbot\/'),
282
- YAHOO_BOT = Regexp.compile('Yahoo! Slurp\/?'),
283
- PINGDOM_BOT = Regexp.compile('Pingdom.com_bot_version_'),
284
- ALEXA_BOT = Regexp.compile('ia_archiver'),
285
- YANDEX_BOT = Regexp.compile('YandexBot\/'),
286
- nil
287
- ].compact!
288
-
289
- def self.known_search_bot?(user_agent) !KNOWN_SEARCH_BOTS.detect { |bot| bot.match(user_agent) }.nil? end
290
- def known_search_bot?() self.class.known_search_bot?(self.http_user_agent) ; end
291
-
292
- #
293
- # mainly to easily identify external referers, for filtering purposes
294
- #
295
-
296
- INTERNAL_REFERERS = [
297
- Regexp.compile('^http://(www\.)?MY_WEBSITE_NAME\.com'),
298
- Regexp.compile('^-$'),
299
- ]
300
-
301
- def self.internal_referer?(http_referer) !INTERNAL_REFERERS.detect { |referer| referer.match(http_referer) }.nil? end
302
- def internal_referer?() self.class.internal_referer?(self.http_referer) ; end
303
-
304
- def self.external_referer?(http_referer) !self.internal_referer?(http_referer) ; end
305
- def external_referer?() self.class.external_referer?(self.http_referer) ; end
306
-
307
- def self.authenticated_user?(remote_user) remote_user and remote_user != "-" ; end
308
- def authenticated_user?() self.class.authenticated_user?(self.remote_user) ; end
309
-
310
186
  #
311
187
  # "GET /xd_receiver.html HTTP/1.1"
312
188
  # "GET /crossdomain.xml HTTP/1.1"
@@ -352,28 +228,5 @@ module NginxTail
352
228
  def self.static_request?(request) !STATIC_REQUESTS.detect { |static_request_regexp| request.match(static_request_regexp) }.nil? end
353
229
  def static_request?() self.class.static_request?(self.request) ; end
354
230
 
355
- NGINX_MAGIC_STATUS = '499' # ex-standard HTTP response code specific to nginx, in addition to http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
356
- UNPROCESSABLE_ENTITY = '422' # not supported by 'net/http' (Net::HTTPResponse::CODE_TO_OBJ["422"] == nil), see also: http://www.ruby-forum.com/topic/98002
357
-
358
- # Informational 1xx
359
- def self.information_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPInformation ; end
360
- def information_status?() self.class.information_status?(self.status) ; end
361
-
362
- # Successful 2xx
363
- def self.success_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPSuccess ; end
364
- def success_status?() self.class.success_status?(self.status) ; end
365
-
366
- # Redirection 3xx
367
- def self.redirect_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPRedirection ; end
368
- def redirect_status?() self.class.redirect_status?(self.status) ; end
369
-
370
- # Client Error 4xx
371
- def self.client_error_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPClientError ; end
372
- def client_error_status?() self.class.client_error_status?(self.status) ; end
373
-
374
- # Internal Server Error 5xx
375
- def self.server_error_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPServerError ; end
376
- def server_error_status?() self.class.server_error_status?(self.status) ; end
377
-
378
231
  end # class LogLine
379
232
  end # module NginxTail