Aplo 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,49 @@
1
+ #
2
+ # Comb - an Apache log analysis script.
3
+ #
4
+ # By Tim Fletcher <twoggle@gmail.com>
5
+ #
6
+ # Licensed under a Creative Commons Attribution-ShareAlike 2.5 License
7
+ # http://creativecommons.org/licenses/by-sa/2.5/
8
+ #
9
+ # Requirements:
10
+ #
11
+ # GeoIP <http://www.maxmind.com/download/geoip/api/c/>
12
+ # Lazy.rb <http://moonbase.rydia.net/software/lazy.rb/>
13
+ #
14
+ # Usage:
15
+ #
16
+ # ruby comb.rb relative/glob/of/access_log*
17
+ #
18
+ #
19
+ __DIR__ = File.dirname(__FILE__)
20
+
21
+ $:.unshift File.join(__DIR__, '..', 'lib')
22
+
23
+ require 'rubygems' rescue nil
24
+ require 'aplo'
25
+ require 'lazy'
26
+ require 'comb/geoip'
27
+ require 'comb/ascii_chart'
28
+ require 'comb/itself'
29
+
30
+ class Numeric
31
+ def MB
32
+ self / 1000000
33
+ end
34
+ def round_to_nearest(n)
35
+ r = self % n; self - r + (r < (n / 2.0) ? 0 : n)
36
+ end
37
+ end
38
+
39
+ def mean(enum)
40
+ enum.inject { |sum, time| sum + time } / enum.size
41
+ end
42
+
43
+ if __FILE__ == $0 then
44
+ Comb(
45
+ :logs => Dir[ARGV[0] || '../logs/access_log*'],
46
+ :cache => File.join(__DIR__, 'comb', 'geoip.cache'),
47
+ :top => 10 # countries
48
+ )
49
+ end
@@ -0,0 +1,46 @@
1
+ class AsciiPercentageChart
2
+
3
+ def initialize(data, options = {})
4
+ @data, @options = data, options
5
+ end
6
+
7
+ # c.f. MenTaLguY's rjust_lines example [ruby-talk 164045]
8
+
9
+ def to_s
10
+ resulting = promise do
11
+ @data.inject({ :output => [], :label_length => 0 }) do |line, (label, value)|
12
+
13
+ line[:label_length] = label.length if label.length > line[:label_length]
14
+
15
+ percentage = value.to_f * 100 / max
16
+
17
+ bar_length = percentage.round_to_nearest(granularity) / granularity
18
+
19
+ end_label = (bar_length == 0 ? '' : ' ') + ("%d, %.1f%%" % [ value, percentage ])
20
+
21
+ line[:output] << promise {
22
+ "#{label.rjust(resulting[:label_length] + 2)} : " + (symbol * bar_length) + end_label
23
+ }
24
+
25
+ next line
26
+ end
27
+ end
28
+ return resulting[:output] * "\n"
29
+ end
30
+
31
+
32
+ private
33
+
34
+ def max
35
+ @options[:max]
36
+ end
37
+
38
+ def granularity
39
+ @options[:granularity] ||= 2
40
+ end
41
+
42
+ def symbol
43
+ @options[:symbol] ||= '*'
44
+ end
45
+
46
+ end
@@ -0,0 +1,40 @@
1
+ module GeoIP
2
+ class Cache
3
+
4
+ # c.f. http://eigenclass.org/hiki.rb?Hash+default+block
5
+
6
+ def initialize(filename)
7
+ @filename = filename
8
+ update!
9
+ end
10
+
11
+ def [](ip)
12
+ data[ip]
13
+ end
14
+
15
+ def save
16
+ data.default = nil
17
+ File.open(@filename, 'w+') { |f| Marshal.dump(data, f) }
18
+ end
19
+
20
+
21
+ private
22
+
23
+ def data
24
+ @data ||= Hash.new { |hash, ip| hash[ip] = lookup(ip) }
25
+ end
26
+
27
+ def lookup(ip)
28
+ convert `geoiplookup #{ip}`
29
+ end
30
+
31
+ def convert(str)
32
+ str =~ /--, N\/A/ ? nil : str.sub(/^GeoIP Country Edition: /, '').sub(/[A-Z]+,/, '').strip
33
+ end
34
+
35
+ def update!
36
+ data.update Marshal.load(File.read(@filename)) rescue {}
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,37 @@
1
+ def Comb(options)
2
+ logs, geoip_cache = options[:logs], GeoIP::Cache.new(options[:cache])
3
+
4
+ ip_addresses, hits, hits_per_country = {}, 0, Hash.new { |h, k| h[k] = 0 }
5
+
6
+ parse_times, start_time = [], Time.now
7
+
8
+ logs.each do |filename|
9
+ time = Time.now
10
+ log = Apache::Log.parse(File.read(filename))
11
+ parse_times << (Time.now - time)
12
+ log.each do |entry|
13
+ hits += 1
14
+ ip_addresses[entry.remote_host] = true
15
+ hits_per_country[ geoip_cache[entry.remote_host] ] += 1
16
+ end
17
+ end
18
+
19
+ time_taken = Time.now - start_time
20
+
21
+ top_countries = hits_per_country.delete_if { |label, _| label.nil? }.
22
+ sort_by { |_, n| n }.reverse[0, options[:top]]
23
+
24
+ throughput = logs.inject(0) { |t, f| t + File.size(f) }
25
+
26
+ geoip_cache.save
27
+
28
+ puts
29
+ puts "> Processed #{logs.size} log files (%.1f Mb) in %.1f seconds" % [ throughput.MB, time_taken ]
30
+ puts "> Average log parse time of %.3f seconds" % mean(parse_times)
31
+ puts "> Total hits: %d" % hits
32
+ puts "> Unique IPs: %d" % ip_addresses.size
33
+ puts "> Top %d countries..." % options[:top]
34
+ puts
35
+ puts AsciiPercentageChart.new(top_countries, :max => hits, :symbol => '|')
36
+ puts
37
+ end
@@ -0,0 +1,45 @@
1
+ # Aplo - A Ruby wrapper for Apache log files.
2
+ #
3
+ # Copyright (c) 2006 Tim Fletcher <twoggle@gmail.com>
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9
+ # of the Software, and to permit persons to whom the Software is furnished to
10
+ # do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
18
+ # OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19
+ # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
20
+ # IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ require 'aplo/directive'
23
+ require 'aplo/directives'
24
+ require 'aplo/entry'
25
+ require 'aplo/format'
26
+
27
+ module Apache
28
+
29
+ CommonLogFormat = Log::Format.new("%h %l %u %t \"%r\" %>s %b")
30
+
31
+ CombinedLogFormat = Log::Format.new("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"")
32
+
33
+ RefererLogFormat = Log::Format.new("%{Referer}i -> %U")
34
+
35
+ AgentLogFormat = Log::Format.new("%{User-agent}i")
36
+
37
+ CommonTimeFormat = '%d/%b/%Y:%H:%M:%S %z'
38
+
39
+ module Log
40
+ def self.parse(lines, format = CombinedLogFormat)
41
+ lines.to_s.split(/\n/).map { |line| format.parse(line) }
42
+ end
43
+ end
44
+
45
+ end
@@ -0,0 +1,64 @@
1
+ module Apache
2
+ module Log
3
+ class Directive
4
+
5
+ class <<self
6
+ def dir(char, name, pattern = nil)
7
+ directives[char] = [name, pattern]
8
+ end
9
+ def convert(directive, conversion = nil, &converter)
10
+ converters[directive] = conversion || converter
11
+ end
12
+ end
13
+
14
+ attr_reader :name, :key
15
+
16
+ alias format key
17
+
18
+ def initialize(str)
19
+ @name, @pattern, @conversion = @@directives[str[-1, 1]]
20
+
21
+ if /\{(.+)\}/ =~ str
22
+ @key = $1
23
+ end
24
+ end
25
+
26
+ def pattern
27
+ @pattern ||= '(.+?)'
28
+ end
29
+
30
+ def keyed?
31
+ !@key.nil?
32
+ end
33
+
34
+ def formatted?
35
+ @name == :time || @name == :process_id
36
+ end
37
+
38
+ def process(str)
39
+ str == '-' ? nil : convert(str)
40
+ end
41
+
42
+
43
+ private
44
+
45
+ class <<self
46
+ def directives
47
+ @@directives ||= {}
48
+ end
49
+ def converters
50
+ @@converters ||= {}
51
+ end
52
+ end
53
+
54
+ def convert(str)
55
+ if converter = self.class.converters[name]
56
+ converter.respond_to?(:call) ? converter.call(str, self) : str.send(converter)
57
+ else
58
+ return str
59
+ end
60
+ end
61
+
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,45 @@
1
+ module Apache
2
+ module Log
3
+ class Directive
4
+
5
+ # c.f. http://httpd.apache.org/docs/2.2/mod/mod_log_config.html
6
+
7
+ dir 'a', :remote_ip
8
+ dir 'A', :local_ip
9
+ dir 'B', :bytes_sent
10
+ dir 'b', :bytes_sent # CLF format, i.e. '-' instead of '0'
11
+ dir 'O', :bytes_sent_including_headers
12
+ dir 'I', :bytes_received
13
+ dir 'C', :cookies
14
+ dir 'e', :env
15
+ dir 'f', :filename
16
+ dir 'h', :remote_host
17
+ dir 'm', :request_method
18
+ dir 'H', :request_protocol
19
+ dir 'i', :request_headers, '(.*?)'
20
+ dir 'l', :remote_logname
21
+ dir 'n', :notes
22
+ dir 'o', :reply_headers
23
+ dir 'p', :port
24
+ dir 'P', :process_id
25
+ dir 'q', :query_string, '(.*?)'
26
+ dir 'r', :request
27
+ dir 's', :status
28
+ dir 'T', :time_taken
29
+ dir 'D', :time_taken # microseconds
30
+ dir 'u', :remote_user
31
+ dir 'U', :url_path
32
+ dir 'v', :canonical_server_name
33
+ dir 'V', :server_name
34
+ dir 'X', :connection_status
35
+ dir 't', :time, '\[(.+?)\]'
36
+
37
+ convert :bytes_sent, :to_i
38
+ convert :bytes_sent_including_headers, :to_i
39
+ convert :bytes_received, :to_i
40
+ convert :status, :to_i
41
+ convert :time_taken, :to_i
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,45 @@
1
+ module Apache
2
+ module Log
3
+ class Entry
4
+
5
+ include Enumerable
6
+
7
+ def initialize(attributes)
8
+ @attributes = attributes.to_hash
9
+ end
10
+
11
+ def [](key)
12
+ @attributes[key]
13
+ end
14
+
15
+ def each
16
+ @attributes.each { |key, value| yield key, value }
17
+ end
18
+
19
+ def inspect
20
+ @@inspect ||= "#<Apache::Log::Entry>"
21
+ end
22
+
23
+ def to_s
24
+ inject([]) do |lines, (directive, value)|
25
+ if value.is_a? Hash
26
+ lines << "#{directive}:"
27
+ value.each { |(k, v)| lines << " #{k}: #{v}" }
28
+ else
29
+ lines << "#{directive}: #{value}"
30
+ end
31
+ next lines
32
+ end * "\n"
33
+ end
34
+
35
+
36
+ private
37
+
38
+ def method_missing(name, *args, &block)
39
+ return @attributes[name] if @attributes.has_key?(name)
40
+ super name, *args, &block
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,50 @@
1
+ module Apache
2
+ module Log
3
+ class Format
4
+
5
+ def initialize(format_string)
6
+ @format_string = format_string.to_s
7
+ end
8
+
9
+ def to_s
10
+ @format_string
11
+ end
12
+
13
+ def parse(str)
14
+ if matchdata = pattern.match(str)
15
+ data = directives.zip(matchdata.captures).inject({}) do |data, (directive, capture)|
16
+ if directive.keyed? && !directive.formatted?
17
+ data[directive.name] ||= {}
18
+ data[directive.name][directive.key] = directive.process(capture)
19
+ else
20
+ data[directive.name] = directive.process(capture)
21
+ end
22
+ next data
23
+ end
24
+ return Entry.new(data)
25
+ else
26
+ raise "failed to match: %p" % str
27
+ end
28
+ end
29
+
30
+
31
+ private
32
+
33
+ def pattern
34
+ @pattern ||= /^#{compile(@format_string)}$/
35
+ end
36
+
37
+ def compile(str)
38
+ str.gsub(/%(\S+)?([a-zA-Z])/) do |str|
39
+ directives << Directive.new(str)
40
+ directives.last.pattern
41
+ end
42
+ end
43
+
44
+ def directives
45
+ @directives ||= []
46
+ end
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,50 @@
1
+ require 'date'
2
+ require 'inline'
3
+
4
+
5
+ module Kernel
6
+
7
+ # About 20 times faster than DateTime._strptime
8
+
9
+ inline do |builder|
10
+ builder.include '<time.h>'
11
+ builder.c_raw %{
12
+ static VALUE strptime_c(int argc, VALUE *argv, VALUE self){
13
+ struct tm t;
14
+ char *string, *format;
15
+
16
+ string = StringValuePtr(argv[0]);
17
+ format = StringValuePtr(argv[1]);
18
+
19
+ if(strptime(string, format, &t) == NULL)
20
+ /* TODO: handle error */;
21
+
22
+ t.tm_isdst = -1;
23
+
24
+ return rb_time_new(timegm(&t), 0);
25
+ }
26
+ }
27
+ end
28
+
29
+ def strptime(str, fmt)
30
+ time = strptime_c(str, fmt)
31
+
32
+ # Most C strptime implementations don't handle timezones.
33
+ if fmt =~ / %z$/ && str =~ / ([-+:a-z0-9]+(?:\s+dst\b)?)$/io
34
+ time + Date.zone_to_diff($1)
35
+ else
36
+ time
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+ module Apache
43
+ module Log
44
+ class Directive
45
+ convert(:time) do |str, dir|
46
+ strptime(str, dir.format || CommonTimeFormat)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,46 @@
1
+ = Aplo
2
+
3
+ A Ruby wrapper for Apache log files.
4
+
5
+ == Basic Usage
6
+
7
+ log_contents = File.read('/path/to/log/file')
8
+
9
+ Apache::Log.parse(log_contents).each do |entry|
10
+ ...
11
+ entry.url_path
12
+ entry.bytes_sent
13
+ entry.remote_host
14
+ entry.request_headers['Referer']
15
+ entry.request_headers['User-agent']
16
+ ...
17
+ end
18
+
19
+ == Formats
20
+
21
+ The `parse` method takes a second (optional) argument that specifies the format
22
+ of the log. This defaults to `Apache::CombinedLogFormat`; `Apache::CommonLogFormat`,
23
+ `RefererLogFormat`, and `AgentLogFormat` are also defined. Other formats can be
24
+ created like so:
25
+
26
+ custom_format = Apache::Log::Format.new(format_string)
27
+
28
+ == Default Type-casting
29
+
30
+ Integers are converted using `to_i`, and `-` is converted to `nil`.
31
+
32
+ == Shortcuts
33
+
34
+ Adding methods to the `Entry` class is the easiest way to enhance the wrapper,
35
+ for example:
36
+
37
+ require 'ipaddr'
38
+
39
+ class Apache::Log::Entry
40
+ def ip
41
+ IPAddr.new(@attributes[:remote_host])
42
+ end
43
+ end
44
+
45
+ A C/strptime/RubyInline extension is also included to help convert times;
46
+ require 'aplo/strptime' to use it.
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: Aplo
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.1"
7
+ date: 2006-05-29 00:00:00 +01:00
8
+ summary: A Ruby wrapper for Apache log files.
9
+ require_paths:
10
+ - lib
11
+ email: twoggle@gmail.com
12
+ homepage: http://aplo.rubyforge.org/
13
+ rubyforge_project: aplo
14
+ description:
15
+ autorequire: aplo
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ authors:
29
+ - Tim Fletcher
30
+ files:
31
+ - lib/aplo.rb
32
+ - lib/aplo/directive.rb
33
+ - lib/aplo/directives.rb
34
+ - lib/aplo/entry.rb
35
+ - lib/aplo/format.rb
36
+ - lib/aplo/strptime.rb
37
+ - example/comb.rb
38
+ - example/comb/ascii_chart.rb
39
+ - example/comb/geoip.rb
40
+ - example/comb/itself.rb
41
+ - readme.txt
42
+ test_files: []
43
+
44
+ rdoc_options: []
45
+
46
+ extra_rdoc_files: []
47
+
48
+ executables: []
49
+
50
+ extensions: []
51
+
52
+ requirements: []
53
+
54
+ dependencies: []
55
+