Aplo 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ #
2
+ # Comb - an Apache log analysis script.
3
+ #
4
+ # By Tim Fletcher <twoggle@gmail.com>
5
+ #
6
+ # Licensed under a Creative Commons Attribution-ShareAlike 2.5 License
7
+ # http://creativecommons.org/licenses/by-sa/2.5/
8
+ #
9
+ # Requirements:
10
+ #
11
+ # GeoIP <http://www.maxmind.com/download/geoip/api/c/>
12
+ # Lazy.rb <http://moonbase.rydia.net/software/lazy.rb/>
13
+ #
14
+ # Usage:
15
+ #
16
+ # ruby comb.rb relative/glob/of/access_log*
17
+ #
18
+ #
19
+ __DIR__ = File.dirname(__FILE__)
20
+
21
+ $:.unshift File.join(__DIR__, '..', 'lib')
22
+
23
+ require 'rubygems' rescue nil
24
+ require 'aplo'
25
+ require 'lazy'
26
+ require 'comb/geoip'
27
+ require 'comb/ascii_chart'
28
+ require 'comb/itself'
29
+
30
+ class Numeric
31
+ def MB
32
+ self / 1000000
33
+ end
34
+ def round_to_nearest(n)
35
+ r = self % n; self - r + (r < (n / 2.0) ? 0 : n)
36
+ end
37
+ end
38
+
39
+ def mean(enum)
40
+ enum.inject { |sum, time| sum + time } / enum.size
41
+ end
42
+
43
+ if __FILE__ == $0 then
44
+ Comb(
45
+ :logs => Dir[ARGV[0] || '../logs/access_log*'],
46
+ :cache => File.join(__DIR__, 'comb', 'geoip.cache'),
47
+ :top => 10 # countries
48
+ )
49
+ end
@@ -0,0 +1,46 @@
1
+ class AsciiPercentageChart
2
+
3
+ def initialize(data, options = {})
4
+ @data, @options = data, options
5
+ end
6
+
7
+ # c.f. MenTaLguY's rjust_lines example [ruby-talk 164045]
8
+
9
+ def to_s
10
+ resulting = promise do
11
+ @data.inject({ :output => [], :label_length => 0 }) do |line, (label, value)|
12
+
13
+ line[:label_length] = label.length if label.length > line[:label_length]
14
+
15
+ percentage = value.to_f * 100 / max
16
+
17
+ bar_length = percentage.round_to_nearest(granularity) / granularity
18
+
19
+ end_label = (bar_length == 0 ? '' : ' ') + ("%d, %.1f%%" % [ value, percentage ])
20
+
21
+ line[:output] << promise {
22
+ "#{label.rjust(resulting[:label_length] + 2)} : " + (symbol * bar_length) + end_label
23
+ }
24
+
25
+ next line
26
+ end
27
+ end
28
+ return resulting[:output] * "\n"
29
+ end
30
+
31
+
32
+ private
33
+
34
+ def max
35
+ @options[:max]
36
+ end
37
+
38
+ def granularity
39
+ @options[:granularity] ||= 2
40
+ end
41
+
42
+ def symbol
43
+ @options[:symbol] ||= '*'
44
+ end
45
+
46
+ end
@@ -0,0 +1,40 @@
1
+ module GeoIP
2
+ class Cache
3
+
4
+ # c.f. http://eigenclass.org/hiki.rb?Hash+default+block
5
+
6
+ def initialize(filename)
7
+ @filename = filename
8
+ update!
9
+ end
10
+
11
+ def [](ip)
12
+ data[ip]
13
+ end
14
+
15
+ def save
16
+ data.default = nil
17
+ File.open(@filename, 'w+') { |f| Marshal.dump(data, f) }
18
+ end
19
+
20
+
21
+ private
22
+
23
+ def data
24
+ @data ||= Hash.new { |hash, ip| hash[ip] = lookup(ip) }
25
+ end
26
+
27
+ def lookup(ip)
28
+ convert `geoiplookup #{ip}`
29
+ end
30
+
31
+ def convert(str)
32
+ str =~ /--, N\/A/ ? nil : str.sub(/^GeoIP Country Edition: /, '').sub(/[A-Z]+,/, '').strip
33
+ end
34
+
35
+ def update!
36
+ data.update Marshal.load(File.read(@filename)) rescue {}
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,37 @@
1
+ def Comb(options)
2
+ logs, geoip_cache = options[:logs], GeoIP::Cache.new(options[:cache])
3
+
4
+ ip_addresses, hits, hits_per_country = {}, 0, Hash.new { |h, k| h[k] = 0 }
5
+
6
+ parse_times, start_time = [], Time.now
7
+
8
+ logs.each do |filename|
9
+ time = Time.now
10
+ log = Apache::Log.parse(File.read(filename))
11
+ parse_times << (Time.now - time)
12
+ log.each do |entry|
13
+ hits += 1
14
+ ip_addresses[entry.remote_host] = true
15
+ hits_per_country[ geoip_cache[entry.remote_host] ] += 1
16
+ end
17
+ end
18
+
19
+ time_taken = Time.now - start_time
20
+
21
+ top_countries = hits_per_country.delete_if { |label, _| label.nil? }.
22
+ sort_by { |_, n| n }.reverse[0, options[:top]]
23
+
24
+ throughput = logs.inject(0) { |t, f| t + File.size(f) }
25
+
26
+ geoip_cache.save
27
+
28
+ puts
29
+ puts "> Processed #{logs.size} log files (%.1f Mb) in %.1f seconds" % [ throughput.MB, time_taken ]
30
+ puts "> Average log parse time of %.3f seconds" % mean(parse_times)
31
+ puts "> Total hits: %d" % hits
32
+ puts "> Unique IPs: %d" % ip_addresses.size
33
+ puts "> Top %d countries..." % options[:top]
34
+ puts
35
+ puts AsciiPercentageChart.new(top_countries, :max => hits, :symbol => '|')
36
+ puts
37
+ end
@@ -0,0 +1,45 @@
1
+ # Aplo - A Ruby wrapper for Apache log files.
2
+ #
3
+ # Copyright (c) 2006 Tim Fletcher <twoggle@gmail.com>
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9
+ # of the Software, and to permit persons to whom the Software is furnished to
10
+ # do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
18
+ # OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19
+ # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
20
+ # IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ require 'aplo/directive'
23
+ require 'aplo/directives'
24
+ require 'aplo/entry'
25
+ require 'aplo/format'
26
+
27
+ module Apache
28
+
29
+ CommonLogFormat = Log::Format.new("%h %l %u %t \"%r\" %>s %b")
30
+
31
+ CombinedLogFormat = Log::Format.new("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"")
32
+
33
+ RefererLogFormat = Log::Format.new("%{Referer}i -> %U")
34
+
35
+ AgentLogFormat = Log::Format.new("%{User-agent}i")
36
+
37
+ CommonTimeFormat = '%d/%b/%Y:%H:%M:%S %z'
38
+
39
+ module Log
40
+ def self.parse(lines, format = CombinedLogFormat)
41
+ lines.to_s.split(/\n/).map { |line| format.parse(line) }
42
+ end
43
+ end
44
+
45
+ end
@@ -0,0 +1,64 @@
1
+ module Apache
2
+ module Log
3
+ class Directive
4
+
5
+ class <<self
6
+ def dir(char, name, pattern = nil)
7
+ directives[char] = [name, pattern]
8
+ end
9
+ def convert(directive, conversion = nil, &converter)
10
+ converters[directive] = conversion || converter
11
+ end
12
+ end
13
+
14
+ attr_reader :name, :key
15
+
16
+ alias format key
17
+
18
+ def initialize(str)
19
+ @name, @pattern, @conversion = @@directives[str[-1, 1]]
20
+
21
+ if /\{(.+)\}/ =~ str
22
+ @key = $1
23
+ end
24
+ end
25
+
26
+ def pattern
27
+ @pattern ||= '(.+?)'
28
+ end
29
+
30
+ def keyed?
31
+ !@key.nil?
32
+ end
33
+
34
+ def formatted?
35
+ @name == :time || @name == :process_id
36
+ end
37
+
38
+ def process(str)
39
+ str == '-' ? nil : convert(str)
40
+ end
41
+
42
+
43
+ private
44
+
45
+ class <<self
46
+ def directives
47
+ @@directives ||= {}
48
+ end
49
+ def converters
50
+ @@converters ||= {}
51
+ end
52
+ end
53
+
54
+ def convert(str)
55
+ if converter = self.class.converters[name]
56
+ converter.respond_to?(:call) ? converter.call(str, self) : str.send(converter)
57
+ else
58
+ return str
59
+ end
60
+ end
61
+
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,45 @@
1
+ module Apache
2
+ module Log
3
+ class Directive
4
+
5
+ # c.f. http://httpd.apache.org/docs/2.2/mod/mod_log_config.html
6
+
7
+ dir 'a', :remote_ip
8
+ dir 'A', :local_ip
9
+ dir 'B', :bytes_sent
10
+ dir 'b', :bytes_sent # CLF format, i.e. '-' instead of '0'
11
+ dir 'O', :bytes_sent_including_headers
12
+ dir 'I', :bytes_received
13
+ dir 'C', :cookies
14
+ dir 'e', :env
15
+ dir 'f', :filename
16
+ dir 'h', :remote_host
17
+ dir 'm', :request_method
18
+ dir 'H', :request_protocol
19
+ dir 'i', :request_headers, '(.*?)'
20
+ dir 'l', :remote_logname
21
+ dir 'n', :notes
22
+ dir 'o', :reply_headers
23
+ dir 'p', :port
24
+ dir 'P', :process_id
25
+ dir 'q', :query_string, '(.*?)'
26
+ dir 'r', :request
27
+ dir 's', :status
28
+ dir 'T', :time_taken
29
+ dir 'D', :time_taken # microseconds
30
+ dir 'u', :remote_user
31
+ dir 'U', :url_path
32
+ dir 'v', :canonical_server_name
33
+ dir 'V', :server_name
34
+ dir 'X', :connection_status
35
+ dir 't', :time, '\[(.+?)\]'
36
+
37
+ convert :bytes_sent, :to_i
38
+ convert :bytes_sent_including_headers, :to_i
39
+ convert :bytes_received, :to_i
40
+ convert :status, :to_i
41
+ convert :time_taken, :to_i
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,45 @@
1
+ module Apache
2
+ module Log
3
+ class Entry
4
+
5
+ include Enumerable
6
+
7
+ def initialize(attributes)
8
+ @attributes = attributes.to_hash
9
+ end
10
+
11
+ def [](key)
12
+ @attributes[key]
13
+ end
14
+
15
+ def each
16
+ @attributes.each { |key, value| yield key, value }
17
+ end
18
+
19
+ def inspect
20
+ @@inspect ||= "#<Apache::Log::Entry>"
21
+ end
22
+
23
+ def to_s
24
+ inject([]) do |lines, (directive, value)|
25
+ if value.is_a? Hash
26
+ lines << "#{directive}:"
27
+ value.each { |(k, v)| lines << " #{k}: #{v}" }
28
+ else
29
+ lines << "#{directive}: #{value}"
30
+ end
31
+ next lines
32
+ end * "\n"
33
+ end
34
+
35
+
36
+ private
37
+
38
+ def method_missing(name, *args, &block)
39
+ return @attributes[name] if @attributes.has_key?(name)
40
+ super name, *args, &block
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,50 @@
1
+ module Apache
2
+ module Log
3
+ class Format
4
+
5
+ def initialize(format_string)
6
+ @format_string = format_string.to_s
7
+ end
8
+
9
+ def to_s
10
+ @format_string
11
+ end
12
+
13
+ def parse(str)
14
+ if matchdata = pattern.match(str)
15
+ data = directives.zip(matchdata.captures).inject({}) do |data, (directive, capture)|
16
+ if directive.keyed? && !directive.formatted?
17
+ data[directive.name] ||= {}
18
+ data[directive.name][directive.key] = directive.process(capture)
19
+ else
20
+ data[directive.name] = directive.process(capture)
21
+ end
22
+ next data
23
+ end
24
+ return Entry.new(data)
25
+ else
26
+ raise "failed to match: %p" % str
27
+ end
28
+ end
29
+
30
+
31
+ private
32
+
33
+ def pattern
34
+ @pattern ||= /^#{compile(@format_string)}$/
35
+ end
36
+
37
+ def compile(str)
38
+ str.gsub(/%(\S+)?([a-zA-Z])/) do |str|
39
+ directives << Directive.new(str)
40
+ directives.last.pattern
41
+ end
42
+ end
43
+
44
+ def directives
45
+ @directives ||= []
46
+ end
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,50 @@
1
+ require 'date'
2
+ require 'inline'
3
+
4
+
5
+ module Kernel
6
+
7
+ # About 20 times faster than DateTime._strptime
8
+
9
+ inline do |builder|
10
+ builder.include '<time.h>'
11
+ builder.c_raw %{
12
+ static VALUE strptime_c(int argc, VALUE *argv, VALUE self){
13
+ struct tm t;
14
+ char *string, *format;
15
+
16
+ string = StringValuePtr(argv[0]);
17
+ format = StringValuePtr(argv[1]);
18
+
19
+ if(strptime(string, format, &t) == NULL)
20
+ /* TODO: handle error */;
21
+
22
+ t.tm_isdst = -1;
23
+
24
+ return rb_time_new(timegm(&t), 0);
25
+ }
26
+ }
27
+ end
28
+
29
+ def strptime(str, fmt)
30
+ time = strptime_c(str, fmt)
31
+
32
+ # Most C strptime implementations don't handle timezones.
33
+ if fmt =~ / %z$/ && str =~ / ([-+:a-z0-9]+(?:\s+dst\b)?)$/io
34
+ time + Date.zone_to_diff($1)
35
+ else
36
+ time
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+ module Apache
43
+ module Log
44
+ class Directive
45
+ convert(:time) do |str, dir|
46
+ strptime(str, dir.format || CommonTimeFormat)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,46 @@
1
+ = Aplo
2
+
3
+ A Ruby wrapper for Apache log files.
4
+
5
+ == Basic Usage
6
+
7
+ log_contents = File.read('/path/to/log/file')
8
+
9
+ Apache::Log.parse(log_contents).each do |entry|
10
+ ...
11
+ entry.url_path
12
+ entry.bytes_sent
13
+ entry.remote_host
14
+ entry.request_headers['Referer']
15
+ entry.request_headers['User-agent']
16
+ ...
17
+ end
18
+
19
+ == Formats
20
+
21
+ The `parse` method takes a second (optional) argument that specifies the format
22
+ of the log. This defaults to `Apache::CombinedLogFormat`; `Apache::CommonLogFormat`,
23
+ `RefererLogFormat`, and `AgentLogFormat` are also defined. Other formats can be
24
+ created like so:
25
+
26
+ custom_format = Apache::Log::Format.new(format_string)
27
+
28
+ == Default Type-casting
29
+
30
+ Integers are converted using `to_i`, and `-` is converted to `nil`.
31
+
32
+ == Shortcuts
33
+
34
+ Adding methods to the `Entry` class is the easiest way to enhance the wrapper,
35
+ for example:
36
+
37
+ require 'ipaddr'
38
+
39
+ class Apache::Log::Entry
40
+ def ip
41
+ IPAddr.new(@attributes[:remote_host])
42
+ end
43
+ end
44
+
45
+ A C/strptime/RubyInline extension is also included to help convert times;
46
+ require 'aplo/strptime' to use it.
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: Aplo
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.1"
7
+ date: 2006-05-29 00:00:00 +01:00
8
+ summary: A Ruby wrapper for Apache log files.
9
+ require_paths:
10
+ - lib
11
+ email: twoggle@gmail.com
12
+ homepage: http://aplo.rubyforge.org/
13
+ rubyforge_project: aplo
14
+ description:
15
+ autorequire: aplo
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ authors:
29
+ - Tim Fletcher
30
+ files:
31
+ - lib/aplo.rb
32
+ - lib/aplo/directive.rb
33
+ - lib/aplo/directives.rb
34
+ - lib/aplo/entry.rb
35
+ - lib/aplo/format.rb
36
+ - lib/aplo/strptime.rb
37
+ - example/comb.rb
38
+ - example/comb/ascii_chart.rb
39
+ - example/comb/geoip.rb
40
+ - example/comb/itself.rb
41
+ - readme.txt
42
+ test_files: []
43
+
44
+ rdoc_options: []
45
+
46
+ extra_rdoc_files: []
47
+
48
+ executables: []
49
+
50
+ extensions: []
51
+
52
+ requirements: []
53
+
54
+ dependencies: []
55
+