vizi_tracker 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +116 -0
- data/Rakefile +44 -0
- data/config/logger-backup.yml +49 -0
- data/config/logger.yml +69 -0
- data/config/logger_apache.yml +65 -0
- data/config/logger_sample.yml +69 -0
- data/data/exlog.log +5458 -0
- data/data/sample-alter.log +11870 -0
- data/data/sample-surf.log +47 -0
- data/data/sample-wle.log +30474 -0
- data/data/testlog.log +270 -0
- data/data/vizitrax.log +17951 -0
- data/doc/Object.html +200 -0
- data/doc/ParserTest.html +268 -0
- data/doc/README_rdoc.html +128 -0
- data/doc/Rakefile.html +148 -0
- data/doc/Visit.html +487 -0
- data/doc/VisitList.html +385 -0
- data/doc/Vizi/LogFormat.html +377 -0
- data/doc/Vizi/LogParser.html +551 -0
- data/doc/Vizi/Visit.html +487 -0
- data/doc/Vizi/VisitList.html +386 -0
- data/doc/Vizi.html +168 -0
- data/doc/ViziLogFormat.html +382 -0
- data/doc/ViziLogParser.html +551 -0
- data/doc/created.rid +8 -0
- data/doc/formats/apache-custom-log.pdf +0 -0
- data/doc/formats/apache.rtf +238 -0
- data/doc/formats/format-descriptions.xls +0 -0
- data/doc/formats/w3c-extended.pdf +0 -0
- data/doc/formats/w3c-extended.rtf +135 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +112 -0
- data/doc/js/darkfish.js +116 -0
- data/doc/js/jquery.js +32 -0
- data/doc/js/quicksearch.js +114 -0
- data/doc/js/thickbox-compressed.js +10 -0
- data/doc/lib/vizi/parser_rb.html +63 -0
- data/doc/lib/vizi/vizi_tracker_rb.html +63 -0
- data/doc/lib/vizi_log_parser_rb.html +56 -0
- data/doc/lib/vizi_tracker_rb.html +56 -0
- data/doc/rdoc.css +759 -0
- data/doc/test/parser_test_rb.html +54 -0
- data/doc/test/test_helper_rb.html +56 -0
- data/doc/testit_rb.html +63 -0
- data/lib/vizi/vizi_tracker.rb +406 -0
- data/lib/vizi_tracker.rb +5 -0
- data/log/parse.log +79 -0
- data/log/system.log +66 -0
- data/test/parser_test.rb +48 -0
- data/test/test_helper.rb +3 -0
- data/testit.rb +105 -0
- data/vizi_tracker.gemspec +21 -0
- metadata +146 -0
data/README.rdoc
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
= ViziTracker
|
2
|
+
|
3
|
+
== Introduction
|
4
|
+
|
5
|
+
This gem provides a set of classes to support the parsing of web log files and
|
6
|
+
the creation of Visit records from the individual parsed web log records
|
7
|
+
|
8
|
+
== Installation
|
9
|
+
|
10
|
+
Just run:
|
11
|
+
|
12
|
+
gem install vizi_tracker
|
13
|
+
|
14
|
+
The following code uses a YAML file to store the configuration attributes that wil
|
15
|
+
drive the logging activity. This file (logger.yml) should be setup and stored in the
|
16
|
+
config sub-directory. Refer also to logger_sample.yml for more details on the
|
17
|
+
configuration values.
|
18
|
+
|
19
|
+
== Usage
|
20
|
+
|
21
|
+
require 'vizi_tracker'
|
22
|
+
require 'yaml'
|
23
|
+
require 'logger'
|
24
|
+
|
25
|
+
config = YAML.load_file("config/logger.yml")
|
26
|
+
|
27
|
+
@@download_page_number = config["download_page_number"]
|
28
|
+
@@visit_timeout = config["visit_timeout"]
|
29
|
+
|
30
|
+
# Initialize the log parser
|
31
|
+
parser = Vizi::LogParser.new(config["drop_ips"], config["spider_ips"],
|
32
|
+
config["spider_names"], config["page_urls"], config["hide_urls"],
|
33
|
+
config["homepage"], config["accept_only_homepage"],config["hostname"],
|
34
|
+
config["drop_refers_by_hostname"], config["use_local_time"],
|
35
|
+
config["assigned_numbers"], config["match_page_numbers"])
|
36
|
+
|
37
|
+
syslog = Logger.new('./log/system.log',shift_age = 'weekly')
|
38
|
+
case config["log_level"]
|
39
|
+
when "info"
|
40
|
+
syslog.level = Logger::INFO
|
41
|
+
when "warn"
|
42
|
+
syslog.level = Logger::WARN
|
43
|
+
when "error"
|
44
|
+
syslog.level = Logger::ERROR
|
45
|
+
when "fatal"
|
46
|
+
syslog.level = Logger::FATAL
|
47
|
+
else
|
48
|
+
syslog.level = Logger::DEBUG
|
49
|
+
end
|
50
|
+
syslog.info "starting ... >>> "+Time.now.to_s
|
51
|
+
|
52
|
+
# Open log file for reading
|
53
|
+
File.open('./data/exlog.log', 'r') do |file|
|
54
|
+
vlist = Vizi::VisitList.new
|
55
|
+
rec_count = 0
|
56
|
+
hit_count = 0
|
57
|
+
max_rec_count = 99999
|
58
|
+
max_rec_count = config["max_rec_count"] if config["max_rec_count"]
|
59
|
+
visit_count = 0
|
60
|
+
page_count = 0
|
61
|
+
human_count = 0
|
62
|
+
drop_count = 0
|
63
|
+
spider_count = 0
|
64
|
+
start_time = Time.now
|
65
|
+
logformat = nil
|
66
|
+
# Begin to parse each record
|
67
|
+
while(line = file.gets)
|
68
|
+
parsed_data = parser.parse_line(line, logformat)
|
69
|
+
logformat = parsed_data[:p_logformat]
|
70
|
+
rec_count = rec_count + 1
|
71
|
+
next if parsed_data[:p_linetype] != "V"
|
72
|
+
hit_count = hit_count + 1
|
73
|
+
page_count = page_count + 1 if parsed_data[:p_pageflag]
|
74
|
+
@visit=vlist.find_by_ip(parsed_data[:ip])
|
75
|
+
if @visit.nil?
|
76
|
+
vlist.append(Vizi::Visit.new(parsed_data[:ip],parsed_data[:datetime],parsed_data[:csuristem],parsed_data[:csuriquery], parsed_data[:timetaken],
|
77
|
+
parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase],parsed_data[:p_pageid]))
|
78
|
+
@visit=vlist.find_by_ip(parsed_data[:ip])
|
79
|
+
visit_count = visit_count + 1
|
80
|
+
else
|
81
|
+
@visit.update(parsed_data[:datetime],parsed_data[:csuriquery],parsed_data[:timetaken],
|
82
|
+
parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase], parsed_data[:p_pageid])
|
83
|
+
end
|
84
|
+
@visits = vlist.find_expired(@visit.start_dt)
|
85
|
+
if @visits
|
86
|
+
@visits.sendoutput
|
87
|
+
vlist.delete(@visits)
|
88
|
+
human_count = human_count + 1 if @visits.visitortype == "H"
|
89
|
+
drop_count = drop_count + 1 if @visits.visitortype == "D"
|
90
|
+
spider_count = spider_count + 1 if @visits.visitortype == "S"
|
91
|
+
end
|
92
|
+
break if rec_count == max_rec_count
|
93
|
+
end
|
94
|
+
@visits = vlist.find_all
|
95
|
+
@visits.each {|v|
|
96
|
+
v.sendoutput
|
97
|
+
human_count = human_count + 1 if v.visitortype == "H"
|
98
|
+
drop_count = drop_count + 1 if v.visitortype == "D"
|
99
|
+
spider_count = spider_count + 1 if v.visitortype == "S"
|
100
|
+
}
|
101
|
+
if config["summary_flag"]
|
102
|
+
syslog.info "Record count is "+rec_count.to_s
|
103
|
+
syslog.info "Hit count is "+hit_count.to_s
|
104
|
+
syslog.info "Page count is "+page_count.to_s
|
105
|
+
syslog.info "Total visit count is "+visit_count.to_s
|
106
|
+
syslog.info "Human visit count is "+human_count.to_s
|
107
|
+
syslog.info "Drop visit count is "+drop_count.to_s
|
108
|
+
syslog.info "Spider visit count is "+spider_count.to_s
|
109
|
+
syslog.info "Batch processing time "+(Time.now-start_time).to_s
|
110
|
+
end
|
111
|
+
syslog.info "ending ... >>> "+Time.now.to_s
|
112
|
+
end
|
113
|
+
|
114
|
+
== License
|
115
|
+
|
116
|
+
This code is made available under the MIT license. It is based on original parser code from Jan Wikholm.
|
data/Rakefile
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/gempackagetask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
spec = Gem::Specification.new do |s|
|
7
|
+
s.name = "vizi_tracker"
|
8
|
+
s.version = "0.1.0"
|
9
|
+
s.author = "Al Kivi"
|
10
|
+
s.email = "al.kivi at vizitrax.com"
|
11
|
+
s.homepage = "http://github.com/al-kivi/vizi_tracker"
|
12
|
+
s.description = "Univeral web log file parser and visit analyzer"
|
13
|
+
s.summary = "A package for parsing web server logs and creating visit records"
|
14
|
+
|
15
|
+
s.platform = Gem::Platform::RUBY
|
16
|
+
s.has_rdoc = true
|
17
|
+
s.extra_rdoc_files = ["README.rdoc"]
|
18
|
+
|
19
|
+
s.require_path = "lib"
|
20
|
+
s.files = %w(README.rdoc Rakefile) + Dir.glob("lib/**/*")
|
21
|
+
end
|
22
|
+
|
23
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
24
|
+
pkg.need_tar = true
|
25
|
+
end
|
26
|
+
|
27
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
28
|
+
rdoc.rdoc_dir = 'rdoc'
|
29
|
+
rdoc.title = 'ViziTracker'
|
30
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
31
|
+
rdoc.rdoc_files.include('README')
|
32
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
33
|
+
end
|
34
|
+
|
35
|
+
Rake::TestTask.new do |t|
|
36
|
+
t.libs << 'test'
|
37
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
38
|
+
t.verbose = true
|
39
|
+
end
|
40
|
+
|
41
|
+
task :default => "pkg/#{spec.name}-#{spec.version}.gem" do
|
42
|
+
puts "generated latest version"
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Configuration data for logger
|
2
|
+
visit_timeout: 1200
|
3
|
+
summary_flag: true
|
4
|
+
convert_to_lower_case: true
|
5
|
+
#max_rec_count: 100
|
6
|
+
homepage: home.aspx
|
7
|
+
accept_only_homepage: true
|
8
|
+
page_extensions:
|
9
|
+
- aspx
|
10
|
+
- asp
|
11
|
+
hostname: www.sigma-systems.com
|
12
|
+
drop_refers_by_hostname: true
|
13
|
+
drop_ips:
|
14
|
+
- 76.12.185.100
|
15
|
+
spider_ips:
|
16
|
+
- 66.98.254.55
|
17
|
+
- 64.208.168.252
|
18
|
+
- 64.235.108.183
|
19
|
+
- 76.2.144.115
|
20
|
+
- 66.98.254.236
|
21
|
+
- 202.108.22.132
|
22
|
+
- 89.122.29.77
|
23
|
+
- 95.174.93.222
|
24
|
+
- 66.55.37.179
|
25
|
+
- 198.45.18.20
|
26
|
+
- 38.104.227.3
|
27
|
+
spider_names:
|
28
|
+
- bot
|
29
|
+
- spider
|
30
|
+
- slurp
|
31
|
+
- root.exe
|
32
|
+
- .dll
|
33
|
+
- slurp
|
34
|
+
- looksmart
|
35
|
+
- nutchsvc
|
36
|
+
- iconsurf
|
37
|
+
- objectsearch
|
38
|
+
- openfind
|
39
|
+
- iltovatore
|
40
|
+
- mozdex
|
41
|
+
- netresearch
|
42
|
+
- konsqueror
|
43
|
+
- crawler
|
44
|
+
- searchme
|
45
|
+
- java/1.6.0_04
|
46
|
+
- scoutjet
|
47
|
+
- yeti
|
48
|
+
- yandex
|
49
|
+
drop_spiders: true
|
data/config/logger.yml
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# Configuration data for logger
|
2
|
+
#max_rec_count: 100
|
3
|
+
visit_timeout: 1200
|
4
|
+
summary_flag: true
|
5
|
+
convert_to_lower_case: true
|
6
|
+
log_level: warn
|
7
|
+
#log_level options are debug, info, warn, error, fatal
|
8
|
+
homepage: /home.aspx
|
9
|
+
#homepage: /
|
10
|
+
accept_only_homepage: true
|
11
|
+
page_urls:
|
12
|
+
- aspx
|
13
|
+
- asp
|
14
|
+
hide_urls:
|
15
|
+
- css
|
16
|
+
- js
|
17
|
+
- gif
|
18
|
+
- swf
|
19
|
+
- ico
|
20
|
+
- chart
|
21
|
+
- robots
|
22
|
+
hostname: www.sigma-systems.com
|
23
|
+
drop_refers_by_hostname: true
|
24
|
+
use_local_time: true
|
25
|
+
download_page_number: 45
|
26
|
+
drop_ips:
|
27
|
+
- 76.12.185.100
|
28
|
+
spider_ips:
|
29
|
+
- 66.98.254.55
|
30
|
+
- 64.208.168.252
|
31
|
+
- 64.235.108.183
|
32
|
+
- 76.2.144.115
|
33
|
+
- 66.98.254.236
|
34
|
+
- 202.108.22.132
|
35
|
+
- 89.122.29.77
|
36
|
+
- 95.174.93.222
|
37
|
+
- 66.55.37.179
|
38
|
+
- 198.45.18.20
|
39
|
+
- 38.104.227.3
|
40
|
+
spider_names:
|
41
|
+
- bot
|
42
|
+
- spider
|
43
|
+
- slurp
|
44
|
+
- root.exe
|
45
|
+
- .dll
|
46
|
+
- slurp
|
47
|
+
- looksmart
|
48
|
+
- nutchsvc
|
49
|
+
- iconsurf
|
50
|
+
- objectsearch
|
51
|
+
- openfind
|
52
|
+
- iltovatore
|
53
|
+
- mozdex
|
54
|
+
- netresearch
|
55
|
+
- konsqueror
|
56
|
+
- crawler
|
57
|
+
- searchme
|
58
|
+
- java/1.6.0_04
|
59
|
+
- scoutjet
|
60
|
+
- yeti
|
61
|
+
- yandex
|
62
|
+
# convert urls to assigned numbers where numbers cannot be parsed from url
|
63
|
+
assigned_numbers:
|
64
|
+
- /visit/index,1
|
65
|
+
- /visit/show/,2
|
66
|
+
- /visit/vcardedit/,3
|
67
|
+
- /visit/showmap_na,4
|
68
|
+
- /visit/showmap_row,5
|
69
|
+
match_page_numbers: false
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Configuration data for logger
|
2
|
+
#max_rec_count: 100
|
3
|
+
visit_timeout: 1200
|
4
|
+
summary_flag: true
|
5
|
+
convert_to_lower_case: true
|
6
|
+
#homepage: /home.aspx
|
7
|
+
homepage: /
|
8
|
+
accept_only_homepage: true
|
9
|
+
page_urls:
|
10
|
+
- aspx
|
11
|
+
- asp
|
12
|
+
hide_urls:
|
13
|
+
- css
|
14
|
+
- js
|
15
|
+
- gif
|
16
|
+
- swf
|
17
|
+
- ico
|
18
|
+
- chart
|
19
|
+
- robots
|
20
|
+
hostname: www.sigma-systems.com
|
21
|
+
drop_refers_by_hostname: true
|
22
|
+
getlocal_time: true
|
23
|
+
drop_ips:
|
24
|
+
- 76.12.185.100
|
25
|
+
spider_ips:
|
26
|
+
- 66.98.254.55
|
27
|
+
- 64.208.168.252
|
28
|
+
- 64.235.108.183
|
29
|
+
- 76.2.144.115
|
30
|
+
- 66.98.254.236
|
31
|
+
- 202.108.22.132
|
32
|
+
- 89.122.29.77
|
33
|
+
- 95.174.93.222
|
34
|
+
- 66.55.37.179
|
35
|
+
- 198.45.18.20
|
36
|
+
- 38.104.227.3
|
37
|
+
spider_names:
|
38
|
+
- bot
|
39
|
+
- spider
|
40
|
+
- slurp
|
41
|
+
- root.exe
|
42
|
+
- .dll
|
43
|
+
- slurp
|
44
|
+
- looksmart
|
45
|
+
- nutchsvc
|
46
|
+
- iconsurf
|
47
|
+
- objectsearch
|
48
|
+
- openfind
|
49
|
+
- iltovatore
|
50
|
+
- mozdex
|
51
|
+
- netresearch
|
52
|
+
- konsqueror
|
53
|
+
- crawler
|
54
|
+
- searchme
|
55
|
+
- java/1.6.0_04
|
56
|
+
- scoutjet
|
57
|
+
- yeti
|
58
|
+
- yandex
|
59
|
+
page_numbers:
|
60
|
+
- /visit/index,1
|
61
|
+
- /visit/show/,2
|
62
|
+
- /visit/vcardedit/,3
|
63
|
+
- /visit/showmap_na,4
|
64
|
+
- /visit/showmap_row,5
|
65
|
+
match_page_numbers: true
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# Configuration data for logger
|
2
|
+
#max_rec_count: 100
|
3
|
+
visit_timeout: 1200
|
4
|
+
summary_flag: true
|
5
|
+
convert_to_lower_case: true
|
6
|
+
log_level: warn
|
7
|
+
#log_level options are debug, info, warn, error, fatal
|
8
|
+
homepage: /home.aspx
|
9
|
+
#homepage: /
|
10
|
+
accept_only_homepage: true
|
11
|
+
page_urls:
|
12
|
+
- aspx
|
13
|
+
- asp
|
14
|
+
hide_urls:
|
15
|
+
- css
|
16
|
+
- js
|
17
|
+
- gif
|
18
|
+
- swf
|
19
|
+
- ico
|
20
|
+
- chart
|
21
|
+
- robots
|
22
|
+
hostname: www.sigma-systems.com
|
23
|
+
drop_refers_by_hostname: true
|
24
|
+
use_local_time: true
|
25
|
+
download_page_number: 45
|
26
|
+
drop_ips:
|
27
|
+
- 76.12.185.100
|
28
|
+
spider_ips:
|
29
|
+
- 66.98.254.55
|
30
|
+
- 64.208.168.252
|
31
|
+
- 64.235.108.183
|
32
|
+
- 76.2.144.115
|
33
|
+
- 66.98.254.236
|
34
|
+
- 202.108.22.132
|
35
|
+
- 89.122.29.77
|
36
|
+
- 95.174.93.222
|
37
|
+
- 66.55.37.179
|
38
|
+
- 198.45.18.20
|
39
|
+
- 38.104.227.3
|
40
|
+
spider_names:
|
41
|
+
- bot
|
42
|
+
- spider
|
43
|
+
- slurp
|
44
|
+
- root.exe
|
45
|
+
- .dll
|
46
|
+
- slurp
|
47
|
+
- looksmart
|
48
|
+
- nutchsvc
|
49
|
+
- iconsurf
|
50
|
+
- objectsearch
|
51
|
+
- openfind
|
52
|
+
- iltovatore
|
53
|
+
- mozdex
|
54
|
+
- netresearch
|
55
|
+
- konsqueror
|
56
|
+
- crawler
|
57
|
+
- searchme
|
58
|
+
- java/1.6.0_04
|
59
|
+
- scoutjet
|
60
|
+
- yeti
|
61
|
+
- yandex
|
62
|
+
# convert urls to assigned numbers where numbers cannot be parsed from url
|
63
|
+
assigned_numbers:
|
64
|
+
- /visit/index,1
|
65
|
+
- /visit/show/,2
|
66
|
+
- /visit/vcardedit/,3
|
67
|
+
- /visit/showmap_na,4
|
68
|
+
- /visit/showmap_row,5
|
69
|
+
match_page_numbers: false
|