ntail 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ module NginxTail
2
+ module HttpVersion
3
+
4
+ def self.included(base) # :nodoc:
5
+ base.class_eval do
6
+
7
+ def self.to_http_version_s(http_version)
8
+ http_version || "" # will be nil if $request == "-" (ie. "dodgy" HTTP requests)
9
+ end
10
+
11
+ # this ensures the below module methods actually make sense...
12
+ raise "Class #{base.name} should implement instance method 'http_version'" unless base.instance_methods.include? 'http_version'
13
+
14
+ end
15
+ end
16
+
17
+ def to_http_version_s
18
+ self.class.to_http_version_s(self.http_version)
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,44 @@
1
+ module NginxTail
2
+ module KnownIpAddresses
3
+
4
+ #
5
+ # known IP addresses, for filtering and formatting purposes
6
+ #
7
+ # e.g. office IP addresses, IP addresses of remote workers, ...
8
+ #
9
+
10
+ def self.included(base) # :nodoc:
11
+ base.class_eval do
12
+
13
+ @@known_ip_addresses = []
14
+
15
+ # mainly (solely?) for testing purposes...
16
+ def self.known_ip_addresses()
17
+ @@known_ip_addresses.dup
18
+ end
19
+
20
+ # mainly (solely?) for testing purposes...
21
+ def self.reset_known_ip_addresses()
22
+ while !@@known_ip_addresses.empty? ; @@known_ip_addresses.pop ; end
23
+ end
24
+
25
+ def self.add_known_ip_address(known_ip_address)
26
+ (@@known_ip_addresses << known_ip_address).uniq!
27
+ end
28
+
29
+ def self.known_ip_address?(remote_addr)
30
+ @@known_ip_addresses.include?(remote_addr)
31
+ end
32
+
33
+ # this ensures the below module methods actually make sense...
34
+ raise "Class #{base.name} should implement instance method 'remote_addr'" unless base.instance_methods.include? 'remote_addr'
35
+
36
+ end
37
+ end
38
+
39
+ def known_ip_address?
40
+ self.class.known_ip_address?(self.remote_addr)
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,44 @@
1
+ module NginxTail
2
+ module LocalIpAddresses
3
+
4
+ #
5
+ # local IP addresses, for filtering and formatting purposes
6
+ #
7
+ # e.g. downstream proxy servers (nginx web servers -> passenger app servers)
8
+ #
9
+
10
+ def self.included(base) # :nodoc:
11
+ base.class_eval do
12
+
13
+ @@local_ip_addresses = []
14
+
15
+ # mainly (solely?) for testing purposes...
16
+ def self.local_ip_addresses()
17
+ @@local_ip_addresses.dup
18
+ end
19
+
20
+ # mainly (solely?) for testing purposes...
21
+ def self.reset_local_ip_addresses()
22
+ while !@@local_ip_addresses.empty? ; @@local_ip_addresses.pop ; end
23
+ end
24
+
25
+ def self.add_local_ip_address(local_ip_address)
26
+ (@@local_ip_addresses << local_ip_address).uniq!
27
+ end
28
+
29
+ def self.local_ip_address?(remote_addr)
30
+ @@local_ip_addresses.include?(remote_addr)
31
+ end
32
+
33
+ # this ensures the below module methods actually make sense...
34
+ raise "Class #{base.name} should implement instance method 'remote_addr'" unless base.instance_methods.include? 'remote_addr'
35
+
36
+ end
37
+ end
38
+
39
+ def local_ip_address?
40
+ self.class.local_ip_address?(self.remote_addr)
41
+ end
42
+
43
+ end
44
+ end
@@ -1,36 +1,54 @@
1
- require 'date'
2
- require 'socket'
3
1
  require 'net/http'
4
2
 
5
3
  require 'rubygems'
6
4
  require 'rainbow'
7
- require 'user-agent'
8
-
9
- begin
10
- require 'geoip'
11
- rescue
12
- # NOOP (optional dependency)
13
- end
14
5
 
15
6
  module NginxTail
16
7
  class LogLine
17
8
 
18
- attr_accessor :raw_line
19
- attr_accessor :parsable
9
+ def self.component_to_module_name(component)
10
+ # this mimicks the ActiveSupport::Inflector.camelize() method in Rails...
11
+ component.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
12
+ end
13
+
14
+ def self.component_to_ntail_module(component)
15
+ # this mimicks the ActiveSupport::Inflector.constantize() method in Rails...
16
+ NginxTail.const_get(self.component_to_module_name(component))
17
+ end
18
+
19
+ attr_reader :raw_line
20
+ attr_reader :parsable
20
21
 
21
- attr_accessor :remote_address
22
- attr_accessor :remote_user
23
- attr_accessor :time_local
24
- attr_accessor :request
25
- attr_accessor :status
26
- attr_accessor :body_bytes_sent
27
- attr_accessor :http_referer
28
- attr_accessor :http_user_agent
29
- attr_accessor :proxy_addresses
30
-
31
- attr_accessor :http_method
32
- attr_accessor :uri
33
- attr_accessor :http_version
22
+ COMPONENTS = [
23
+ :remote_addr,
24
+ :remote_user,
25
+ :time_local,
26
+ :request,
27
+ :status,
28
+ :body_bytes_sent,
29
+ :http_referer,
30
+ :http_user_agent,
31
+ :proxy_addresses,
32
+ ]
33
+
34
+ COMPONENTS.each do |symbol|
35
+ attr_reader symbol
36
+ include component_to_ntail_module(symbol)
37
+ end
38
+
39
+ include KnownIpAddresses # module to identify known IP addresses
40
+ include LocalIpAddresses # module to identify local IP addresses
41
+
42
+ SUBCOMPONENTS = [
43
+ :http_method,
44
+ :uri,
45
+ :http_version,
46
+ ]
47
+
48
+ SUBCOMPONENTS.each do |symbol|
49
+ attr_reader symbol
50
+ include component_to_ntail_module(symbol)
51
+ end
34
52
 
35
53
  #
36
54
  # http://wiki.nginx.org/NginxHttpLogModule#log_format - we currently only support the default "combined" log format...
@@ -42,7 +60,7 @@ module NginxTail
42
60
 
43
61
  def initialize(line)
44
62
  @parsable = if NGINX_LOG_PATTERN.match(@raw_line = line)
45
- @remote_address, @remote_user, @time_local, @request, @status, @body_bytes_sent, @http_referer, @http_user_agent, @proxy_addresses = $~.captures
63
+ @remote_addr, @remote_user, @time_local, @request, @status, @body_bytes_sent, @http_referer, @http_user_agent, @proxy_addresses = $~.captures
46
64
  if NGINX_REQUEST_PATTERN.match(@request)
47
65
  # counter example (ie. HTTP request that cannot by parsed)
48
66
  # 91.203.96.51 - - [21/Dec/2010:05:26:53 +0000] "-" 400 0 "-" "-"
@@ -57,6 +75,8 @@ module NginxTail
57
75
  end
58
76
  end
59
77
 
78
+ alias_method :remote_address, :remote_addr # a non-abbreviated alias, for convenience and readability...
79
+
60
80
  # for now, until we make it fancier...
61
81
  def method_missing(method, *params)
62
82
  raw_line.send method, *params
@@ -72,52 +92,50 @@ module NginxTail
72
92
  else
73
93
  :default
74
94
  end
75
- "%s - %#{Sickill::Rainbow.enabled ? 15 + 9 : 15}s - %s - %s - %s" % [
76
- to_date.strftime("%Y-%m-%d %X").foreground(color),
95
+ "%s - %#{Sickill::Rainbow.enabled ? 15 + 9 : 15}s - %s - %s - %s - %s" % [
96
+ to_date_s.foreground(color),
77
97
  remote_address.foreground(color),
78
98
  status.foreground(color),
79
- (uri || "-").foreground(color),
80
- to_agent_s.foreground(color)
99
+ to_request_s.foreground(color),
100
+ to_agent_s.foreground(color),
101
+ to_referer_s.foreground(color).inverse
81
102
  ]
82
103
  end
83
-
84
- COMPONENTS = [
85
-
86
- :remote_address,
87
- :remote_user,
88
- :time_local,
89
- :request,
90
- :status,
91
- :body_bytes_sent,
92
- :http_referer,
93
- :http_user_agent,
94
- :proxy_addresses,
95
-
96
- ]
97
-
98
- SUBCOMPONENTS = [
99
-
100
- :http_method,
101
- :uri,
102
- :http_version,
103
-
104
- ]
105
104
 
106
105
  CONVERSIONS = [
107
106
 
108
107
  :to_date,
108
+ :to_date_s,
109
+
109
110
  :to_agent,
111
+ :to_agent_s,
110
112
 
111
113
  :to_host_name,
112
- :to_country,
113
- :to_city,
114
+ :to_refering_website,
115
+
116
+ :to_country_s,
117
+ :to_city_s,
114
118
 
115
119
  ]
116
120
 
117
- def self.log_subcomponent?(subcomponent) SUBCOMPONENTS.include?(subcomponent) ; end # TODO replace with some clever meta-programming...
118
- def self.log_component?(component) COMPONENTS.include?(component) ; end # TODO replace with some clever meta-programming...
119
- def self.log_conversion?(conversion) CONVERSIONS.include?(conversion) ; end # TODO replace with some clever meta-programming...
120
- def self.log_directive?(directive) (directive == :full) or log_conversion?(directive) or log_component?(directive) or log_subcomponent?(directive) ; end
121
+ def self.log_subcomponent?(subcomponent)
122
+ # TODO replace with some clever meta-programming...
123
+ SUBCOMPONENTS.include?(subcomponent)
124
+ end
125
+
126
+ def self.log_component?(component)
127
+ # TODO replace with some clever meta-programming...
128
+ COMPONENTS.include?(component)
129
+ end
130
+
131
+ def self.log_conversion?(conversion)
132
+ # TODO replace with some clever meta-programming...
133
+ CONVERSIONS.include?(conversion)
134
+ end
135
+
136
+ def self.log_directive?(directive)
137
+ (directive == :full) or log_conversion?(directive) or log_component?(directive) or log_subcomponent?(directive)
138
+ end
121
139
 
122
140
  #
123
141
  # extraction filters for log line components
@@ -165,148 +183,6 @@ module NginxTail
165
183
  def self.valid_referer?(referer) true ; end
166
184
  def self.valid_user_agent?(user_agent) true ; end
167
185
 
168
- #
169
- # conversion of log line components
170
- #
171
-
172
- # >> DateTime.strptime("13/Apr/2010:04:45:51 +0100", '%d/%b/%Y:%T %z').to_s
173
- # => "2010-04-13T04:45:51+01:00"
174
- # >> DateTime.strptime("13/Apr/2010:04:45:51 +0100", '%d/%b/%Y:%H:%M:%S %z').to_s
175
- # => "2010-04-13T04:45:51+01:00"
176
- # >> _
177
-
178
- def to_date() DateTime.strptime(self.time_local, '%d/%b/%Y:%T %z') ; end
179
-
180
- class SearchBot < Agent
181
- attr_accessor :name
182
- attr_accessor :os
183
- def initialize(string)
184
- super string
185
- @name = self.class.name_for_user_agent(string)
186
- @os = self.class.os_for_user_agent(string)
187
- end
188
- def self.name_for_user_agent string
189
- case string
190
- when GOOGLE_BOT then "googlebot"
191
- when MSN_BOT then "msnbot"
192
- when YAHOO_BOT then "yahoo_slurp"
193
- when ALEXA_BOT then "ia_archiver"
194
- when PINGDOM_BOT then "pingdom_bot"
195
- when YANDEX_BOT then "yandex_bot"
196
- else super(string)
197
- end
198
- end
199
- def self.os_for_user_agent string
200
- case string
201
- when GOOGLE_BOT then "google.com"
202
- when MSN_BOT then "msn.com"
203
- when YAHOO_BOT then "yahoo.com"
204
- when ALEXA_BOT then "alexa.com"
205
- when PINGDOM_BOT then "pingdom.com"
206
- when YANDEX_BOT then "yandex.com"
207
- else super(string)
208
- end
209
- end
210
- end
211
-
212
- def to_agent()
213
- if known_search_bot?
214
- SearchBot.new(self.http_user_agent)
215
- else
216
- Agent.new(self.http_user_agent)
217
- end
218
- end
219
-
220
- def to_agent_s()
221
- agent = self.to_agent ; "(%s, %s)" % [agent.name, agent.os]
222
- end
223
-
224
- def to_host_name()
225
- Socket::getaddrinfo(self.remote_address,nil)[0][2]
226
- end
227
-
228
- if defined? GeoIP # ie. if the optional GeoIP gem is installed
229
-
230
- if File.exists?('/usr/share/GeoIP/GeoIP.dat')
231
- def to_country()
232
- record = GeoIP.new('/usr/share/GeoIP/GeoIP.dat').country(self.remote_address) ; record ? record[5] : 'N/A'
233
- end
234
- end
235
-
236
- if File.exists?('/usr/share/GeoIP/GeoIPCity.dat')
237
- def to_city()
238
- record = GeoIP.new('/usr/share/GeoIP/GeoIPCity.dat').city(self.remote_address) ; record ? record[7] : 'N/A'
239
- end
240
- end
241
-
242
- end
243
-
244
- #
245
- # downstream proxy servers
246
- #
247
-
248
- PROXY_IP_ADDRESSES = %w{
249
- 192.168.0.2
250
- 192.168.0.3
251
- 192.168.0.4
252
- }
253
-
254
- def self.proxy_ip_address?(remote_address) PROXY_IP_ADDRESSES.include?(remote_address) ; end
255
- def proxy_ip_address?() self.class.proxy_ip_address?(self.remote_address) ; end
256
-
257
- #
258
- # known IP addresses, for filtering purposes
259
- #
260
-
261
- OFFICE_IP_ADDRESSES = %w{
262
- }
263
-
264
- def self.office_ip_address?(remote_address) OFFICE_IP_ADDRESSES.include?(remote_address) ; end
265
- def office_ip_address?() self.class.office_ip_address?(self.remote_address) ; end
266
-
267
- #
268
- # Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
269
- # Googlebot-Image/1.0
270
- # msnbot/2.0b (+http://search.msn.com/msnbot.htm)
271
- # msnbot/2.0b (+http://search.msn.com/msnbot.htm).
272
- # msnbot/2.0b (+http://search.msn.com/msnbot.htm)._
273
- # Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)
274
- # Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)
275
- # ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)
276
- # Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
277
- #
278
-
279
- KNOWN_SEARCH_BOTS = [
280
- GOOGLE_BOT = Regexp.compile('Googlebot.*\/'),
281
- MSN_BOT = Regexp.compile('msnbot\/'),
282
- YAHOO_BOT = Regexp.compile('Yahoo! Slurp\/?'),
283
- PINGDOM_BOT = Regexp.compile('Pingdom.com_bot_version_'),
284
- ALEXA_BOT = Regexp.compile('ia_archiver'),
285
- YANDEX_BOT = Regexp.compile('YandexBot\/'),
286
- nil
287
- ].compact!
288
-
289
- def self.known_search_bot?(user_agent) !KNOWN_SEARCH_BOTS.detect { |bot| bot.match(user_agent) }.nil? end
290
- def known_search_bot?() self.class.known_search_bot?(self.http_user_agent) ; end
291
-
292
- #
293
- # mainly to easily identify external referers, for filtering purposes
294
- #
295
-
296
- INTERNAL_REFERERS = [
297
- Regexp.compile('^http://(www\.)?MY_WEBSITE_NAME\.com'),
298
- Regexp.compile('^-$'),
299
- ]
300
-
301
- def self.internal_referer?(http_referer) !INTERNAL_REFERERS.detect { |referer| referer.match(http_referer) }.nil? end
302
- def internal_referer?() self.class.internal_referer?(self.http_referer) ; end
303
-
304
- def self.external_referer?(http_referer) !self.internal_referer?(http_referer) ; end
305
- def external_referer?() self.class.external_referer?(self.http_referer) ; end
306
-
307
- def self.authenticated_user?(remote_user) remote_user and remote_user != "-" ; end
308
- def authenticated_user?() self.class.authenticated_user?(self.remote_user) ; end
309
-
310
186
  #
311
187
  # "GET /xd_receiver.html HTTP/1.1"
312
188
  # "GET /crossdomain.xml HTTP/1.1"
@@ -352,28 +228,5 @@ module NginxTail
352
228
  def self.static_request?(request) !STATIC_REQUESTS.detect { |static_request_regexp| request.match(static_request_regexp) }.nil? end
353
229
  def static_request?() self.class.static_request?(self.request) ; end
354
230
 
355
- NGINX_MAGIC_STATUS = '499' # ex-standard HTTP response code specific to nginx, in addition to http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
356
- UNPROCESSABLE_ENTITY = '422' # not supported by 'net/http' (Net::HTTPResponse::CODE_TO_OBJ["422"] == nil), see also: http://www.ruby-forum.com/topic/98002
357
-
358
- # Informational 1xx
359
- def self.information_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPInformation ; end
360
- def information_status?() self.class.information_status?(self.status) ; end
361
-
362
- # Successful 2xx
363
- def self.success_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPSuccess ; end
364
- def success_status?() self.class.success_status?(self.status) ; end
365
-
366
- # Redirection 3xx
367
- def self.redirect_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPRedirection ; end
368
- def redirect_status?() self.class.redirect_status?(self.status) ; end
369
-
370
- # Client Error 4xx
371
- def self.client_error_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPClientError ; end
372
- def client_error_status?() self.class.client_error_status?(self.status) ; end
373
-
374
- # Internal Server Error 5xx
375
- def self.server_error_status?(status) (status.to_s != NGINX_MAGIC_STATUS) and Net::HTTPResponse::CODE_TO_OBJ[status.to_s] <= Net::HTTPServerError ; end
376
- def server_error_status?() self.class.server_error_status?(self.status) ; end
377
-
378
231
  end # class LogLine
379
232
  end # module NginxTail