rubiojr-apalo 0.0.101
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +18 -0
- data/README.txt +63 -0
- data/Rakefile +21 -0
- data/THANKS +8 -0
- data/bin/atk +23 -0
- data/lib/apalo/cli/commands/basic.rb +88 -0
- data/lib/apalo/cli/commands/hit_counter.rb +67 -0
- data/lib/apalo/cli/commands/monthly_stats.rb +28 -0
- data/lib/apalo/cli/commands/user_agents.rb +17 -0
- data/lib/apalo/cli/commands/vhost_stats.rb +26 -0
- data/lib/apalo/cli/commands/vhosts_overview.rb +58 -0
- data/lib/apalo/cli.rb +53 -0
- data/lib/apalo/core/log_line.rb +9 -0
- data/lib/apalo/core/log_parser.rb +71 -0
- data/lib/apalo/core/log_string.rb +53 -0
- data/lib/apalo/core.rb +3 -0
- data/lib/apalo.rb +26 -0
- metadata +108 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.txt
|
4
|
+
Rakefile
|
5
|
+
THANKS
|
6
|
+
bin/atk
|
7
|
+
lib/apalo.rb
|
8
|
+
lib/apalo/cli.rb
|
9
|
+
lib/apalo/cli/commands/basic.rb
|
10
|
+
lib/apalo/cli/commands/hit_counter.rb
|
11
|
+
lib/apalo/cli/commands/monthly_stats.rb
|
12
|
+
lib/apalo/cli/commands/user_agents.rb
|
13
|
+
lib/apalo/cli/commands/vhost_stats.rb
|
14
|
+
lib/apalo/cli/commands/vhosts_overview.rb
|
15
|
+
lib/apalo/core.rb
|
16
|
+
lib/apalo/core/log_line.rb
|
17
|
+
lib/apalo/core/log_parser.rb
|
18
|
+
lib/apalo/core/log_string.rb
|
data/README.txt
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
= Apalo Toolkit
|
2
|
+
|
3
|
+
* http://apalo.netcorex.org (Project page)
|
4
|
+
* http://github.com/rubiojr/apalo (Code)
|
5
|
+
* http://rubiojr.lighthouseapp.com/projects/16913-apalo/overview (Bug tracker)
|
6
|
+
|
7
|
+
== DESCRIPTION:
|
8
|
+
|
9
|
+
Apache Logs Toolkit
|
10
|
+
|
11
|
+
== FEATURES
|
12
|
+
|
13
|
+
* Analyzes apache log files (combined format and combined+vhost format)
|
14
|
+
presenting useful reports
|
15
|
+
* Modular, easily extended
|
16
|
+
* Multiple output formats (stdout, pdf, html, ...) [TODO]
|
17
|
+
* Multiple analyzers included (basic, ...) [TODO, only one ATM]
|
18
|
+
|
19
|
+
== PROBLEMS
|
20
|
+
|
21
|
+
* Early, buggy, mostly untested code
|
22
|
+
* Some of the code is pretty lame and slow (Especialy analyzers)
|
23
|
+
|
24
|
+
== SYNOPSIS:
|
25
|
+
|
26
|
+
* Library and tools to analyze Apache log files
|
27
|
+
|
28
|
+
== REQUIREMENTS:
|
29
|
+
|
30
|
+
* Installing from gem should pull all the requirementes
|
31
|
+
* Oniguruma Regexp Library is optional buy highly recommended.
|
32
|
+
It's waaaay faster than the current regexp engine in ruby1.8.
|
33
|
+
Apalo performance is much better if oniguruma is present.
|
34
|
+
|
35
|
+
* 2 log formats supported ATM:
|
36
|
+
* LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
|
37
|
+
* LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" [%V]" combinedv
|
38
|
+
|
39
|
+
|
40
|
+
== LICENSE:
|
41
|
+
|
42
|
+
(The MIT License)
|
43
|
+
|
44
|
+
Copyright (c) 2008 Sergio Rubio <sergio@rubio.name>
|
45
|
+
|
46
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
47
|
+
a copy of this software and associated documentation files (the
|
48
|
+
'Software'), to deal in the Software without restriction, including
|
49
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
50
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
51
|
+
permit persons to whom the Software is furnished to do so, subject to
|
52
|
+
the following conditions:
|
53
|
+
|
54
|
+
The above copyright notice and this permission notice shall be
|
55
|
+
included in all copies or substantial portions of the Software.
|
56
|
+
|
57
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
58
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
59
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
60
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
61
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
62
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
63
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rake'
|
2
|
+
$:.unshift(File.dirname(__FILE__) + "/lib")
|
3
|
+
require 'apalo'
|
4
|
+
require 'hoe'
|
5
|
+
|
6
|
+
Hoe.new('Apalo', Apalo::VERSION) do |p|
|
7
|
+
p.name = "apalo"
|
8
|
+
p.author = "Sergio Rubio"
|
9
|
+
p.description = %q{Library and utilities to analyse Apache logs}
|
10
|
+
p.email = 'sergio@rubio.name'
|
11
|
+
p.summary = "Apache Logs Toolkit"
|
12
|
+
p.url = "http://github.com/rubiojr/apalo"
|
13
|
+
#p.clean_globs = ['test/output/*.png']
|
14
|
+
#p.changes = p.paragraphs_of('CHANGELOG', 0..1).join("\n\n")
|
15
|
+
p.remote_rdoc_dir = '' # Release to root
|
16
|
+
p.developer('Sergio Rubio', 'sergio@rubio.name')
|
17
|
+
p.extra_deps << [ "ptools",">= 1.1.6" ]
|
18
|
+
p.extra_deps << ["term-ansicolor",">= 1.0"]
|
19
|
+
p.extra_deps << ["cmdparse", ">= 0.6.5"]
|
20
|
+
end
|
21
|
+
|
data/THANKS
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
The person behind http://www.the-art-of-web.com/system/logs/. That article started it all.
|
2
|
+
|
3
|
+
To Andreas Staeding, the person behind http://www.user-agents.org/
|
4
|
+
|
5
|
+
To all the people behind Ruby, the language we all love.
|
6
|
+
|
7
|
+
|
8
|
+
To the people behind GitHub (http://www.github.com) and Git. The give us great tools to manage our code.
|
data/bin/atk
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
##!/opt/ruby1.9/bin/ruby
|
3
|
+
require 'rubygems'
|
4
|
+
require 'cmdparse'
|
5
|
+
require 'term/ansicolor'
|
6
|
+
require 'logger'
|
7
|
+
require 'apalo'
|
8
|
+
include Apalo::Core
|
9
|
+
|
10
|
+
class String
|
11
|
+
include Term::ANSIColor
|
12
|
+
end
|
13
|
+
|
14
|
+
Apalo::Cli.init(Apalo::VERSION)
|
15
|
+
tstart = Time.now
|
16
|
+
Apalo::Cli.run
|
17
|
+
tend = Time.now
|
18
|
+
lsize = File.size? Apalo.logfile
|
19
|
+
|
20
|
+
puts
|
21
|
+
puts "Time taken: ".ljust(20) + (tend - tstart).to_s + " seconds"
|
22
|
+
puts "Analyzed log size: ".ljust(20) + lsize.to_s + " bytes"
|
23
|
+
puts "Lines processed: ".ljust(20) + Apalo.parser.processed_lines.to_s
|
@@ -0,0 +1,88 @@
|
|
1
|
+
def find_top10(hash)
|
2
|
+
if hash.values.size > 10
|
3
|
+
top10 = hash.values.sort[-10..-1]
|
4
|
+
else
|
5
|
+
top10 = hash.values.sort
|
6
|
+
end
|
7
|
+
return \
|
8
|
+
hash.find_all { |key,val| top10.include?(val) }.sort{ |a,b| a[1] <=> b[1] }
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
Apalo::Cli.plugin 'basic' do
|
13
|
+
user_agents = {}
|
14
|
+
response_codes = {}
|
15
|
+
hits_per_ip = {}
|
16
|
+
methods = {}
|
17
|
+
requested_files = {}
|
18
|
+
hits_per_hour = {}
|
19
|
+
|
20
|
+
Apalo.parser.each_line do |line|
|
21
|
+
next if line.nil?
|
22
|
+
ua = line.user_agent
|
23
|
+
if not user_agents[ua].nil?
|
24
|
+
user_agents[ua] += 1
|
25
|
+
else
|
26
|
+
user_agents[ua] = 1
|
27
|
+
end
|
28
|
+
|
29
|
+
rc = line.rcode
|
30
|
+
if not response_codes[rc].nil?
|
31
|
+
response_codes[rc] += 1
|
32
|
+
else
|
33
|
+
response_codes[rc] = 1
|
34
|
+
end
|
35
|
+
|
36
|
+
req = line.request.split()[1]
|
37
|
+
if not requested_files[req].nil?
|
38
|
+
requested_files[req] += 1
|
39
|
+
else
|
40
|
+
requested_files[req] = 1
|
41
|
+
end
|
42
|
+
|
43
|
+
addr = line.ipaddr
|
44
|
+
if hits_per_ip[addr].nil?
|
45
|
+
hits_per_ip[addr] = 1
|
46
|
+
else
|
47
|
+
hits_per_ip[addr] += 1
|
48
|
+
end
|
49
|
+
|
50
|
+
i = line.time.index(':')
|
51
|
+
day,month,year = line.time[0..i-1].split("/")
|
52
|
+
t = line.time[i+1..-1].split[0]
|
53
|
+
time = Time.parse("#{year}-#{Time.parse(month).month}-#{day} #{t}").strftime('%Y-%m-%d %H (%A)')
|
54
|
+
if hits_per_hour[time].nil?
|
55
|
+
hits_per_hour[time] = 1
|
56
|
+
else
|
57
|
+
hits_per_hour[time] += 1
|
58
|
+
end
|
59
|
+
end
|
60
|
+
ua = user_agents
|
61
|
+
find_top10(ua).each do |key,val|
|
62
|
+
puts "#{val}: ".ljust(10, " ") + "#{key}"
|
63
|
+
end
|
64
|
+
|
65
|
+
puts "\n** TOP 10 Response Codes **".bold
|
66
|
+
rc = params[:response_codes]
|
67
|
+
find_top10(rc).each do |key,val|
|
68
|
+
puts "#{val}: ".ljust(10, " ") + "#{key}"
|
69
|
+
end
|
70
|
+
|
71
|
+
puts "\n** TOP 10 Requested File".bold
|
72
|
+
rf = params[:requested_files]
|
73
|
+
find_top10(rf).each do |key,val|
|
74
|
+
puts "#{val}: ".ljust(10, " ") + "#{key}"
|
75
|
+
end
|
76
|
+
|
77
|
+
puts "\n** TOP 10 IP Addresses".bold
|
78
|
+
rf = hits_per_ip
|
79
|
+
find_top10(rf).each do |key,val|
|
80
|
+
puts "#{val}: ".ljust(10, " ") + "#{key}"
|
81
|
+
end
|
82
|
+
|
83
|
+
puts "\n** TOP 10 Hours (Busiest)".bold
|
84
|
+
rf = hits_per_hour
|
85
|
+
find_top10(rf).each do |key,val|
|
86
|
+
puts "#{val}: ".ljust(10, " ") + "#{key}"
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
desc = 'reports hits, type of requests, bots request...'
|
2
|
+
Apalo::Cli.plugin('hit_counter', desc) do |args|
|
3
|
+
size = File.size(Apalo.logfile)
|
4
|
+
sizes = %w[KB MB GB]
|
5
|
+
choosen_size = 0
|
6
|
+
hits = 0
|
7
|
+
posts = 0
|
8
|
+
gets = 0
|
9
|
+
options = 0
|
10
|
+
heads = 0
|
11
|
+
propfinds = 0
|
12
|
+
start_date = nil
|
13
|
+
end_date = nil
|
14
|
+
puts = 0
|
15
|
+
locks = 0
|
16
|
+
bot_hits = 0
|
17
|
+
visits = 0
|
18
|
+
images = 0
|
19
|
+
while size/1024.0 > 1000
|
20
|
+
choosen_size += 1
|
21
|
+
size = size/1024
|
22
|
+
end
|
23
|
+
|
24
|
+
puts "Log file size: #{sprintf('%-.2f', (size/1024.0).to_s)} #{sizes[choosen_size]}"
|
25
|
+
|
26
|
+
Apalo.parser.each_line do |line|
|
27
|
+
start_date = line.time.split[0].split(':')[0] if start_date.nil?
|
28
|
+
end_date = line.time.split[0].split(':')[0]
|
29
|
+
hits += 1
|
30
|
+
if line.request.is_post?
|
31
|
+
posts += 1
|
32
|
+
elsif line.request.is_get?
|
33
|
+
gets += 1
|
34
|
+
elsif line.request.is_options?
|
35
|
+
options += 1
|
36
|
+
elsif line.request.is_head?
|
37
|
+
heads += 1
|
38
|
+
elsif line.request.is_propfind?
|
39
|
+
propfinds += 1
|
40
|
+
elsif line.request.is_put?
|
41
|
+
puts += 1
|
42
|
+
elsif line.request.is_lock?
|
43
|
+
locks += 1
|
44
|
+
else
|
45
|
+
puts
|
46
|
+
end
|
47
|
+
bot_hits += 1 if line.user_agent.is_a_bot?
|
48
|
+
images += 1 if line.request.image?
|
49
|
+
if not line.request.is_a_bot? and not line.request.image?
|
50
|
+
visits += 1
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
puts "First log date: ".ljust(30) + start_date
|
55
|
+
puts "Las log date: ".ljust(30) + end_date
|
56
|
+
puts "Total request: ".ljust(30) + hits.to_s
|
57
|
+
puts "Visits received: ".ljust(30) + visits.to_s
|
58
|
+
puts "Total POST request: ".ljust(30) + posts.to_s
|
59
|
+
puts "Total GET requests: ".ljust(30) + gets.to_s
|
60
|
+
puts "Total OPTION requests: ".ljust(30) + options.to_s
|
61
|
+
puts "Total HEAD requests: ".ljust(30) + heads.to_s
|
62
|
+
puts "Total PROPFIND requests: ".ljust(30) + propfinds.to_s
|
63
|
+
puts "Total PUT requests: ".ljust(30) + puts.to_s
|
64
|
+
puts "Total LOCK requests: ".ljust(30) + locks.to_s
|
65
|
+
puts "Images served: ".ljust(30) + images.to_s
|
66
|
+
puts "Bots Requests: ".ljust(30) + bot_hits.to_s
|
67
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
Apalo::Cli.plugin('monthly_stats', 'Stats for the current month') do
|
2
|
+
#year, month = Time.now.year, Time.now.month
|
3
|
+
#months = {
|
4
|
+
# 1 => 'Jan',
|
5
|
+
# 2 => 'Feb',
|
6
|
+
# 3 => 'Mar',
|
7
|
+
# 4 => 'Apr',
|
8
|
+
# 5 => 'May',
|
9
|
+
# 6 => 'Jun',
|
10
|
+
# 7 => 'Jul',
|
11
|
+
# 8 => 'Aug',
|
12
|
+
# 9 => 'Sep',
|
13
|
+
# 10 => 'Oct',
|
14
|
+
# 11 => 'Nov',
|
15
|
+
# 12 => 'Dec',
|
16
|
+
#}
|
17
|
+
#hits = 0
|
18
|
+
ts = Time.now
|
19
|
+
Apalo.parser.each_line do |line|
|
20
|
+
# if not line.nil?
|
21
|
+
# if line.time =~ /#{months[month]}\/#{year}/
|
22
|
+
# hits += 1
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
end
|
26
|
+
te = Time.now
|
27
|
+
puts "#{te - ts} secs"
|
28
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
Apalo::Cli.plugin 'user_agents' do |args|
|
2
|
+
args = [] if args.empty?
|
3
|
+
agents = []
|
4
|
+
Apalo.parser.each_line do |line|
|
5
|
+
next if line.nil?
|
6
|
+
if args.empty?
|
7
|
+
agents << line.user_agent if not agents.include?(line.user_agent)
|
8
|
+
else
|
9
|
+
if args.include?(line.vhost) and not agents.include?(line.user_agent)
|
10
|
+
agents << line.user_agent
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
puts agents
|
15
|
+
puts
|
16
|
+
puts "Unique User-Agents found: #{agents.size}"
|
17
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
Apalo::Cli.plugin 'vhost_stats' do |args|
|
2
|
+
vhost = args[0].strip.chomp
|
3
|
+
if vhost.nil? or vhost !~ /^([a-z0-9A-Z]+\.)+([a-z0-9A-Z])+$/
|
4
|
+
puts "invalid hostname #{vhost}"
|
5
|
+
exit 1
|
6
|
+
end
|
7
|
+
hits = 0
|
8
|
+
image_hits = 0
|
9
|
+
visits = 0
|
10
|
+
bot_hits = 0
|
11
|
+
bytes_transferred = 0
|
12
|
+
Apalo.parser.each_line do |line|
|
13
|
+
next if line.nil? or line.vhost != vhost
|
14
|
+
hits += 1
|
15
|
+
bot_hits +=1 if line.user_agent.is_a_bot?
|
16
|
+
image_hits += 1 if line.request.image?
|
17
|
+
bytes_transferred += line.rsize.to_i if line.rsize != '-'
|
18
|
+
end
|
19
|
+
|
20
|
+
puts "Virtual Host: ".ljust(40) + vhost
|
21
|
+
puts "Total Hits: ".ljust(40) + hits.to_s
|
22
|
+
puts "Images Served: ".ljust(40) + image_hits.to_s
|
23
|
+
puts "Bot Hits: ".ljust(40) + bot_hits.to_s
|
24
|
+
puts "Visits: ".ljust(40) + (hits - image_hits - bot_hits).to_s
|
25
|
+
puts "Bytes Transfered: ".ljust(40) + bytes_transferred.to_s
|
26
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
def render_top10(hash, title)
|
2
|
+
puts title
|
3
|
+
puts
|
4
|
+
if hash.values.size > 10
|
5
|
+
top10 = hash.values.sort[-10..-1]
|
6
|
+
else
|
7
|
+
top10 = hash.values.sort
|
8
|
+
end
|
9
|
+
hash.find_all { |key,val| top10.include?(val) }.sort{ |a,b| a[1] <=> b[1] }.each do |key,val|
|
10
|
+
puts "#{val}: ".ljust(10, " ") + "#{key}" if val != 0
|
11
|
+
end
|
12
|
+
puts
|
13
|
+
end
|
14
|
+
|
15
|
+
Apalo::Cli.plugin 'vhosts_overview' do
|
16
|
+
vtable = {}
|
17
|
+
Apalo.parser.each_line do |line|
|
18
|
+
next if line.nil?
|
19
|
+
vhost = line.vhost
|
20
|
+
return if vhost.nil?
|
21
|
+
if vtable[vhost].nil?
|
22
|
+
vtable[vhost] = {
|
23
|
+
:hits => 1,
|
24
|
+
}
|
25
|
+
else
|
26
|
+
vtable[vhost][:hits] += 1
|
27
|
+
rcode = line.rcode.to_i
|
28
|
+
vtable[vhost][rcode] = 0 if vtable[vhost][rcode].nil?
|
29
|
+
vtable[vhost][rcode] += 1
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
hits = {}
|
34
|
+
vtable.each do |key,val|
|
35
|
+
hits[key] = val[:hits]
|
36
|
+
end
|
37
|
+
render_top10 hits, "*** Top10 Hits per Virtual Host ***"
|
38
|
+
|
39
|
+
serrors = {}
|
40
|
+
vtable.each do |key,val|
|
41
|
+
serrors[key] = 0
|
42
|
+
[500,501,502,503,504,505].each do |c|
|
43
|
+
serrors[key] += val[c] || 0
|
44
|
+
end
|
45
|
+
end
|
46
|
+
render_top10 serrors, "*** Top10 Virtual Host by Server Errors (5xx) ***"
|
47
|
+
|
48
|
+
unauth = {}
|
49
|
+
vtable.each do |key,val|
|
50
|
+
unauth[key] = val[401] || 0
|
51
|
+
end
|
52
|
+
render_top10 unauth, "*** Top10 Unauthorized Requests by VHost (401) ***"
|
53
|
+
forb = {}
|
54
|
+
vtable.each do |key,val|
|
55
|
+
forb[key] = val[403] || 0
|
56
|
+
end
|
57
|
+
render_top10 forb, "*** Top10 Forbidden Requests by VHost (403) ***"
|
58
|
+
end
|
data/lib/apalo/cli.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
module Apalo
|
2
|
+
module Cli
|
3
|
+
def self.init(version)
|
4
|
+
@cmd = CmdParse::CommandParser.new( true, true )
|
5
|
+
@cmd.program_name = "knocked"
|
6
|
+
@cmd.program_version = version.split('.')
|
7
|
+
@cmd.options = CmdParse::OptionParserWrapper.new do |opt|
|
8
|
+
opt.separator "Global options:"
|
9
|
+
opt.on("--verbose", "Be verbose when outputting info") {|t| $verbose = true }
|
10
|
+
end
|
11
|
+
@cmd.options = CmdParse::OptionParserWrapper.new do |opt|
|
12
|
+
opt.on("-l FILE", "--logfile FILE", "Log file to analyze") { |f| Apalo.logfile = f }
|
13
|
+
end
|
14
|
+
@cmd.add_command( CmdParse::HelpCommand.new )
|
15
|
+
@cmd.add_command( CmdParse::VersionCommand.new )
|
16
|
+
#puts "## loading built in commands ##"
|
17
|
+
Dir["#{File.dirname(__FILE__)}/cli/commands/*.rb"].each do |f|
|
18
|
+
#puts "#{f}"
|
19
|
+
load f
|
20
|
+
end
|
21
|
+
load_plugins
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.plugin(cmd, description = 'test')
|
25
|
+
c = CmdParse::Command.new( cmd, false, false )
|
26
|
+
c.short_desc = description
|
27
|
+
c.set_execution_block do |args|
|
28
|
+
yield args
|
29
|
+
end
|
30
|
+
@cmd.add_command c
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.run
|
34
|
+
@cmd.parse
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
def self.load_plugins
|
39
|
+
if File.directory?("#{ENV['HOME']}/.apalo/commands")
|
40
|
+
#puts '### loading custom plugins ###'
|
41
|
+
Dir["#{ENV['HOME']}/.apalo/commands/*.rb"].each do |p|
|
42
|
+
begin
|
43
|
+
#puts p
|
44
|
+
load p
|
45
|
+
rescue SyntaxError
|
46
|
+
puts "Error loading plugin #{p}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Apalo
|
2
|
+
module Core
|
3
|
+
class LogParser
|
4
|
+
|
5
|
+
attr_reader :processed_lines, :errors, :filtered_lines
|
6
|
+
|
7
|
+
def initialize(filter = nil)
|
8
|
+
@processed_lines = 0
|
9
|
+
@filtered_lines = 0
|
10
|
+
@errors = 0
|
11
|
+
if filter
|
12
|
+
begin
|
13
|
+
require 'oniguruma'
|
14
|
+
@filter = Oniguruma::ORegexp.new(filter)
|
15
|
+
rescue Exception => e
|
16
|
+
STDERR.puts \
|
17
|
+
"WARNING: oniguruma gem not installed. Log analysis will be much slower."
|
18
|
+
@filter = /#{filter}/
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def each_line
|
24
|
+
@regex = nil
|
25
|
+
@logline = LogLine.new
|
26
|
+
r = [
|
27
|
+
'(\d+\.\d+\.\d+\.\d+)', # ip
|
28
|
+
'(.*?)', # foo
|
29
|
+
'(.*?)', # bar
|
30
|
+
'\[(.*?)\]', # datetime
|
31
|
+
'"(.*?)"', # request
|
32
|
+
'(\d+)', # code
|
33
|
+
'(-|\d+)', # size
|
34
|
+
'"(.*?)"', # referer
|
35
|
+
'"(.*?)"', # user-agent
|
36
|
+
'\[(.*?)\]' # vhost
|
37
|
+
]
|
38
|
+
logr = "^#{r.join('\s+')}$"
|
39
|
+
begin
|
40
|
+
require 'oniguruma'
|
41
|
+
@regex = Oniguruma::ORegexp.new(logr)
|
42
|
+
rescue Exception => e
|
43
|
+
puts e.message
|
44
|
+
STDERR.puts \
|
45
|
+
"WARNING: oniguruma gem not installed. Log analysis will be much slower."
|
46
|
+
@regex = /#{r}/
|
47
|
+
end
|
48
|
+
File.open(Apalo.logfile) do |f|
|
49
|
+
f.each_line do |line|
|
50
|
+
@processed_lines += 1
|
51
|
+
if @regex.match(line)
|
52
|
+
@logline.ipaddr = $1
|
53
|
+
@logline.ident = $2
|
54
|
+
@logline.userid = $3
|
55
|
+
@logline.time = $4
|
56
|
+
@logline.request = $5
|
57
|
+
@logline.rcode = $6
|
58
|
+
@logline.rsize = $7
|
59
|
+
@logline.referer = $8
|
60
|
+
@logline.user_agent = $9
|
61
|
+
@logline.vhost = $10
|
62
|
+
@logline.raw = line
|
63
|
+
yield @logline
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end # module Core
|
71
|
+
end # module Apalo
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Apalo
|
2
|
+
module Core
|
3
|
+
module LogString
|
4
|
+
def image?
|
5
|
+
self =~ /\.(png|PNG|jpg|JPG|gif|GIF|svg|SVG|jpeg|JPEG) HTTP\/1\.\d$/
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_regex(regex)
|
9
|
+
return true if self =~ regex
|
10
|
+
false
|
11
|
+
end
|
12
|
+
|
13
|
+
def is_a_bot?
|
14
|
+
test_regex %r{^.*(Yahoo! Slurp|Googlebot|TurnitinBot|Twiceler|msnbot|Gigabot|Yandex|TurnitinBot|ia_archiver).*$}
|
15
|
+
end
|
16
|
+
|
17
|
+
def is_get?
|
18
|
+
test_regex %r{^GET .*$}
|
19
|
+
end
|
20
|
+
|
21
|
+
def is_post?
|
22
|
+
test_regex %r{^POST .*$}
|
23
|
+
end
|
24
|
+
|
25
|
+
def is_options?
|
26
|
+
test_regex %r{^OPTIONS .*$}
|
27
|
+
end
|
28
|
+
|
29
|
+
def is_head?
|
30
|
+
test_regex %r{^HEAD .*$}
|
31
|
+
end
|
32
|
+
|
33
|
+
def is_propfind?
|
34
|
+
test_regex %r{^PROPFIND .*$}
|
35
|
+
end
|
36
|
+
|
37
|
+
def is_put?
|
38
|
+
test_regex %r{^PUT .*$}
|
39
|
+
end
|
40
|
+
|
41
|
+
def is_lock?
|
42
|
+
test_regex %r{^LOCK .*$}
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
end # module Core
|
49
|
+
end # end module Apalo
|
50
|
+
|
51
|
+
class String
|
52
|
+
include Apalo::Core::LogString
|
53
|
+
end
|
data/lib/apalo/core.rb
ADDED
data/lib/apalo.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'time'
|
2
|
+
require File.dirname(__FILE__) + '/apalo/core'
|
3
|
+
require File.dirname(__FILE__) + '/apalo/cli'
|
4
|
+
|
5
|
+
module Apalo
|
6
|
+
|
7
|
+
VERSION = '0.0.101'
|
8
|
+
|
9
|
+
def self.logfile=(l)
|
10
|
+
@logfile = l
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.logfile
|
14
|
+
@logfile
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.parser
|
18
|
+
@parser ||= LogParser.new
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.parsing_errors
|
22
|
+
@parsing_errors ||= []
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
metadata
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rubiojr-apalo
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.101
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sergio RubioSergio Rubio
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-12-09 00:00:00 -08:00
|
13
|
+
default_executable: atk
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: ptools
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.1.6
|
23
|
+
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: term-ansicolor
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: "1.0"
|
32
|
+
version:
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: cmdparse
|
35
|
+
version_requirement:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.6.5
|
41
|
+
version:
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: hoe
|
44
|
+
version_requirement:
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.8.2
|
50
|
+
version:
|
51
|
+
description: Library and utilities to analyse Apache logs
|
52
|
+
email: sergio@rubio.namesergio@rubio.name
|
53
|
+
executables:
|
54
|
+
- atk
|
55
|
+
extensions: []
|
56
|
+
|
57
|
+
extra_rdoc_files:
|
58
|
+
- History.txt
|
59
|
+
- Manifest.txt
|
60
|
+
- README.txt
|
61
|
+
files:
|
62
|
+
- History.txt
|
63
|
+
- Manifest.txt
|
64
|
+
- README.txt
|
65
|
+
- Rakefile
|
66
|
+
- THANKS
|
67
|
+
- bin/atk
|
68
|
+
- lib/apalo.rb
|
69
|
+
- lib/apalo/cli.rb
|
70
|
+
- lib/apalo/cli/commands/basic.rb
|
71
|
+
- lib/apalo/cli/commands/hit_counter.rb
|
72
|
+
- lib/apalo/cli/commands/monthly_stats.rb
|
73
|
+
- lib/apalo/cli/commands/user_agents.rb
|
74
|
+
- lib/apalo/cli/commands/vhost_stats.rb
|
75
|
+
- lib/apalo/cli/commands/vhosts_overview.rb
|
76
|
+
- lib/apalo/core.rb
|
77
|
+
- lib/apalo/core/log_line.rb
|
78
|
+
- lib/apalo/core/log_parser.rb
|
79
|
+
- lib/apalo/core/log_string.rb
|
80
|
+
has_rdoc: true
|
81
|
+
homepage: http://github.com/rubiojr/apalo
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options:
|
84
|
+
- --main
|
85
|
+
- README.txt
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: "0"
|
93
|
+
version:
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: "0"
|
99
|
+
version:
|
100
|
+
requirements: []
|
101
|
+
|
102
|
+
rubyforge_project: apalo
|
103
|
+
rubygems_version: 1.2.0
|
104
|
+
signing_key:
|
105
|
+
specification_version: 2
|
106
|
+
summary: Apache Logs Toolkit
|
107
|
+
test_files: []
|
108
|
+
|