vizi_tracker 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +116 -0
- data/Rakefile +44 -0
- data/config/logger-backup.yml +49 -0
- data/config/logger.yml +69 -0
- data/config/logger_apache.yml +65 -0
- data/config/logger_sample.yml +69 -0
- data/data/exlog.log +5458 -0
- data/data/sample-alter.log +11870 -0
- data/data/sample-surf.log +47 -0
- data/data/sample-wle.log +30474 -0
- data/data/testlog.log +270 -0
- data/data/vizitrax.log +17951 -0
- data/doc/Object.html +200 -0
- data/doc/ParserTest.html +268 -0
- data/doc/README_rdoc.html +128 -0
- data/doc/Rakefile.html +148 -0
- data/doc/Visit.html +487 -0
- data/doc/VisitList.html +385 -0
- data/doc/Vizi/LogFormat.html +377 -0
- data/doc/Vizi/LogParser.html +551 -0
- data/doc/Vizi/Visit.html +487 -0
- data/doc/Vizi/VisitList.html +386 -0
- data/doc/Vizi.html +168 -0
- data/doc/ViziLogFormat.html +382 -0
- data/doc/ViziLogParser.html +551 -0
- data/doc/created.rid +8 -0
- data/doc/formats/apache-custom-log.pdf +0 -0
- data/doc/formats/apache.rtf +238 -0
- data/doc/formats/format-descriptions.xls +0 -0
- data/doc/formats/w3c-extended.pdf +0 -0
- data/doc/formats/w3c-extended.rtf +135 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +112 -0
- data/doc/js/darkfish.js +116 -0
- data/doc/js/jquery.js +32 -0
- data/doc/js/quicksearch.js +114 -0
- data/doc/js/thickbox-compressed.js +10 -0
- data/doc/lib/vizi/parser_rb.html +63 -0
- data/doc/lib/vizi/vizi_tracker_rb.html +63 -0
- data/doc/lib/vizi_log_parser_rb.html +56 -0
- data/doc/lib/vizi_tracker_rb.html +56 -0
- data/doc/rdoc.css +759 -0
- data/doc/test/parser_test_rb.html +54 -0
- data/doc/test/test_helper_rb.html +56 -0
- data/doc/testit_rb.html +63 -0
- data/lib/vizi/vizi_tracker.rb +406 -0
- data/lib/vizi_tracker.rb +5 -0
- data/log/parse.log +79 -0
- data/log/system.log +66 -0
- data/test/parser_test.rb +48 -0
- data/test/test_helper.rb +3 -0
- data/testit.rb +105 -0
- data/vizi_tracker.gemspec +21 -0
- metadata +146 -0
data/README.rdoc
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
= ViziTracker
|
2
|
+
|
3
|
+
== Introduction
|
4
|
+
|
5
|
+
This gem provides a set of classes to support the parsing of web log files and
|
6
|
+
the creation of Visit records from the individual parsed web log records
|
7
|
+
|
8
|
+
== Installation
|
9
|
+
|
10
|
+
Just run:
|
11
|
+
|
12
|
+
gem install vizi_tracker
|
13
|
+
|
14
|
+
The following code uses a YAML file to store the configuration attributes that wil
|
15
|
+
drive the logging activity. This file (logger.yml) should be setup and stored in the
|
16
|
+
config sub-directory. Refer also to logger_sample.yml for more details on the
|
17
|
+
configuration values.
|
18
|
+
|
19
|
+
== Usage
|
20
|
+
|
21
|
+
require 'vizi_tracker'
|
22
|
+
require 'yaml'
|
23
|
+
require 'logger'
|
24
|
+
|
25
|
+
config = YAML.load_file("config/logger.yml")
|
26
|
+
|
27
|
+
@@download_page_number = config["download_page_number"]
|
28
|
+
@@visit_timeout = config["visit_timeout"]
|
29
|
+
|
30
|
+
# Initialize the log parser
|
31
|
+
parser = Vizi::LogParser.new(config["drop_ips"], config["spider_ips"],
|
32
|
+
config["spider_names"], config["page_urls"], config["hide_urls"],
|
33
|
+
config["homepage"], config["accept_only_homepage"],config["hostname"],
|
34
|
+
config["drop_refers_by_hostname"], config["use_local_time"],
|
35
|
+
config["assigned_numbers"], config["match_page_numbers"])
|
36
|
+
|
37
|
+
syslog = Logger.new('./log/system.log',shift_age = 'weekly')
|
38
|
+
case config["log_level"]
|
39
|
+
when "info"
|
40
|
+
syslog.level = Logger::INFO
|
41
|
+
when "warn"
|
42
|
+
syslog.level = Logger::WARN
|
43
|
+
when "error"
|
44
|
+
syslog.level = Logger::ERROR
|
45
|
+
when "fatal"
|
46
|
+
syslog.level = Logger::FATAL
|
47
|
+
else
|
48
|
+
syslog.level = Logger::DEBUG
|
49
|
+
end
|
50
|
+
syslog.info "starting ... >>> "+Time.now.to_s
|
51
|
+
|
52
|
+
# Open log file for reading
|
53
|
+
File.open('./data/exlog.log', 'r') do |file|
|
54
|
+
vlist = Vizi::VisitList.new
|
55
|
+
rec_count = 0
|
56
|
+
hit_count = 0
|
57
|
+
max_rec_count = 99999
|
58
|
+
max_rec_count = config["max_rec_count"] if config["max_rec_count"]
|
59
|
+
visit_count = 0
|
60
|
+
page_count = 0
|
61
|
+
human_count = 0
|
62
|
+
drop_count = 0
|
63
|
+
spider_count = 0
|
64
|
+
start_time = Time.now
|
65
|
+
logformat = nil
|
66
|
+
# Begin to parse each record
|
67
|
+
while(line = file.gets)
|
68
|
+
parsed_data = parser.parse_line(line, logformat)
|
69
|
+
logformat = parsed_data[:p_logformat]
|
70
|
+
rec_count = rec_count + 1
|
71
|
+
next if parsed_data[:p_linetype] != "V"
|
72
|
+
hit_count = hit_count + 1
|
73
|
+
page_count = page_count + 1 if parsed_data[:p_pageflag]
|
74
|
+
@visit=vlist.find_by_ip(parsed_data[:ip])
|
75
|
+
if @visit.nil?
|
76
|
+
vlist.append(Vizi::Visit.new(parsed_data[:ip],parsed_data[:datetime],parsed_data[:csuristem],parsed_data[:csuriquery], parsed_data[:timetaken],
|
77
|
+
parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase],parsed_data[:p_pageid]))
|
78
|
+
@visit=vlist.find_by_ip(parsed_data[:ip])
|
79
|
+
visit_count = visit_count + 1
|
80
|
+
else
|
81
|
+
@visit.update(parsed_data[:datetime],parsed_data[:csuriquery],parsed_data[:timetaken],
|
82
|
+
parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase], parsed_data[:p_pageid])
|
83
|
+
end
|
84
|
+
@visits = vlist.find_expired(@visit.start_dt)
|
85
|
+
if @visits
|
86
|
+
@visits.sendoutput
|
87
|
+
vlist.delete(@visits)
|
88
|
+
human_count = human_count + 1 if @visits.visitortype == "H"
|
89
|
+
drop_count = drop_count + 1 if @visits.visitortype == "D"
|
90
|
+
spider_count = spider_count + 1 if @visits.visitortype == "S"
|
91
|
+
end
|
92
|
+
break if rec_count == max_rec_count
|
93
|
+
end
|
94
|
+
@visits = vlist.find_all
|
95
|
+
@visits.each {|v|
|
96
|
+
v.sendoutput
|
97
|
+
human_count = human_count + 1 if v.visitortype == "H"
|
98
|
+
drop_count = drop_count + 1 if v.visitortype == "D"
|
99
|
+
spider_count = spider_count + 1 if v.visitortype == "S"
|
100
|
+
}
|
101
|
+
if config["summary_flag"]
|
102
|
+
syslog.info "Record count is "+rec_count.to_s
|
103
|
+
syslog.info "Hit count is "+hit_count.to_s
|
104
|
+
syslog.info "Page count is "+page_count.to_s
|
105
|
+
syslog.info "Total visit count is "+visit_count.to_s
|
106
|
+
syslog.info "Human visit count is "+human_count.to_s
|
107
|
+
syslog.info "Drop visit count is "+drop_count.to_s
|
108
|
+
syslog.info "Spider visit count is "+spider_count.to_s
|
109
|
+
syslog.info "Batch processing time "+(Time.now-start_time).to_s
|
110
|
+
end
|
111
|
+
syslog.info "ending ... >>> "+Time.now.to_s
|
112
|
+
end
|
113
|
+
|
114
|
+
== License
|
115
|
+
|
116
|
+
This code is made available under the MIT license. It is based on original parser code from Jan Wikholm.
|
data/Rakefile
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/gempackagetask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
spec = Gem::Specification.new do |s|
|
7
|
+
s.name = "vizi_tracker"
|
8
|
+
s.version = "0.1.0"
|
9
|
+
s.author = "Al Kivi"
|
10
|
+
s.email = "al.kivi at vizitrax.com"
|
11
|
+
s.homepage = "http://github.com/al-kivi/vizi_tracker"
|
12
|
+
s.description = "Univeral web log file parser and visit analyzer"
|
13
|
+
s.summary = "A package for parsing web server logs and creating visit records"
|
14
|
+
|
15
|
+
s.platform = Gem::Platform::RUBY
|
16
|
+
s.has_rdoc = true
|
17
|
+
s.extra_rdoc_files = ["README.rdoc"]
|
18
|
+
|
19
|
+
s.require_path = "lib"
|
20
|
+
s.files = %w(README.rdoc Rakefile) + Dir.glob("lib/**/*")
|
21
|
+
end
|
22
|
+
|
23
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
24
|
+
pkg.need_tar = true
|
25
|
+
end
|
26
|
+
|
27
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
28
|
+
rdoc.rdoc_dir = 'rdoc'
|
29
|
+
rdoc.title = 'ViziTracker'
|
30
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
31
|
+
rdoc.rdoc_files.include('README')
|
32
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
33
|
+
end
|
34
|
+
|
35
|
+
Rake::TestTask.new do |t|
|
36
|
+
t.libs << 'test'
|
37
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
38
|
+
t.verbose = true
|
39
|
+
end
|
40
|
+
|
41
|
+
task :default => "pkg/#{spec.name}-#{spec.version}.gem" do
|
42
|
+
puts "generated latest version"
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Configuration data for logger
|
2
|
+
visit_timeout: 1200
|
3
|
+
summary_flag: true
|
4
|
+
convert_to_lower_case: true
|
5
|
+
#max_rec_count: 100
|
6
|
+
homepage: home.aspx
|
7
|
+
accept_only_homepage: true
|
8
|
+
page_extensions:
|
9
|
+
- aspx
|
10
|
+
- asp
|
11
|
+
hostname: www.sigma-systems.com
|
12
|
+
drop_refers_by_hostname: true
|
13
|
+
drop_ips:
|
14
|
+
- 76.12.185.100
|
15
|
+
spider_ips:
|
16
|
+
- 66.98.254.55
|
17
|
+
- 64.208.168.252
|
18
|
+
- 64.235.108.183
|
19
|
+
- 76.2.144.115
|
20
|
+
- 66.98.254.236
|
21
|
+
- 202.108.22.132
|
22
|
+
- 89.122.29.77
|
23
|
+
- 95.174.93.222
|
24
|
+
- 66.55.37.179
|
25
|
+
- 198.45.18.20
|
26
|
+
- 38.104.227.3
|
27
|
+
spider_names:
|
28
|
+
- bot
|
29
|
+
- spider
|
30
|
+
- slurp
|
31
|
+
- root.exe
|
32
|
+
- .dll
|
33
|
+
- slurp
|
34
|
+
- looksmart
|
35
|
+
- nutchsvc
|
36
|
+
- iconsurf
|
37
|
+
- objectsearch
|
38
|
+
- openfind
|
39
|
+
- iltovatore
|
40
|
+
- mozdex
|
41
|
+
- netresearch
|
42
|
+
- konsqueror
|
43
|
+
- crawler
|
44
|
+
- searchme
|
45
|
+
- java/1.6.0_04
|
46
|
+
- scoutjet
|
47
|
+
- yeti
|
48
|
+
- yandex
|
49
|
+
drop_spiders: true
|
data/config/logger.yml
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# Configuration data for logger
|
2
|
+
#max_rec_count: 100
|
3
|
+
visit_timeout: 1200
|
4
|
+
summary_flag: true
|
5
|
+
convert_to_lower_case: true
|
6
|
+
log_level: warn
|
7
|
+
#log_level options are debug, info, warn, error, fatal
|
8
|
+
homepage: /home.aspx
|
9
|
+
#homepage: /
|
10
|
+
accept_only_homepage: true
|
11
|
+
page_urls:
|
12
|
+
- aspx
|
13
|
+
- asp
|
14
|
+
hide_urls:
|
15
|
+
- css
|
16
|
+
- js
|
17
|
+
- gif
|
18
|
+
- swf
|
19
|
+
- ico
|
20
|
+
- chart
|
21
|
+
- robots
|
22
|
+
hostname: www.sigma-systems.com
|
23
|
+
drop_refers_by_hostname: true
|
24
|
+
use_local_time: true
|
25
|
+
download_page_number: 45
|
26
|
+
drop_ips:
|
27
|
+
- 76.12.185.100
|
28
|
+
spider_ips:
|
29
|
+
- 66.98.254.55
|
30
|
+
- 64.208.168.252
|
31
|
+
- 64.235.108.183
|
32
|
+
- 76.2.144.115
|
33
|
+
- 66.98.254.236
|
34
|
+
- 202.108.22.132
|
35
|
+
- 89.122.29.77
|
36
|
+
- 95.174.93.222
|
37
|
+
- 66.55.37.179
|
38
|
+
- 198.45.18.20
|
39
|
+
- 38.104.227.3
|
40
|
+
spider_names:
|
41
|
+
- bot
|
42
|
+
- spider
|
43
|
+
- slurp
|
44
|
+
- root.exe
|
45
|
+
- .dll
|
46
|
+
- slurp
|
47
|
+
- looksmart
|
48
|
+
- nutchsvc
|
49
|
+
- iconsurf
|
50
|
+
- objectsearch
|
51
|
+
- openfind
|
52
|
+
- iltovatore
|
53
|
+
- mozdex
|
54
|
+
- netresearch
|
55
|
+
- konsqueror
|
56
|
+
- crawler
|
57
|
+
- searchme
|
58
|
+
- java/1.6.0_04
|
59
|
+
- scoutjet
|
60
|
+
- yeti
|
61
|
+
- yandex
|
62
|
+
# convert urls to assigned numbers where numbers cannot be parsed from url
|
63
|
+
assigned_numbers:
|
64
|
+
- /visit/index,1
|
65
|
+
- /visit/show/,2
|
66
|
+
- /visit/vcardedit/,3
|
67
|
+
- /visit/showmap_na,4
|
68
|
+
- /visit/showmap_row,5
|
69
|
+
match_page_numbers: false
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Configuration data for logger
|
2
|
+
#max_rec_count: 100
|
3
|
+
visit_timeout: 1200
|
4
|
+
summary_flag: true
|
5
|
+
convert_to_lower_case: true
|
6
|
+
#homepage: /home.aspx
|
7
|
+
homepage: /
|
8
|
+
accept_only_homepage: true
|
9
|
+
page_urls:
|
10
|
+
- aspx
|
11
|
+
- asp
|
12
|
+
hide_urls:
|
13
|
+
- css
|
14
|
+
- js
|
15
|
+
- gif
|
16
|
+
- swf
|
17
|
+
- ico
|
18
|
+
- chart
|
19
|
+
- robots
|
20
|
+
hostname: www.sigma-systems.com
|
21
|
+
drop_refers_by_hostname: true
|
22
|
+
getlocal_time: true
|
23
|
+
drop_ips:
|
24
|
+
- 76.12.185.100
|
25
|
+
spider_ips:
|
26
|
+
- 66.98.254.55
|
27
|
+
- 64.208.168.252
|
28
|
+
- 64.235.108.183
|
29
|
+
- 76.2.144.115
|
30
|
+
- 66.98.254.236
|
31
|
+
- 202.108.22.132
|
32
|
+
- 89.122.29.77
|
33
|
+
- 95.174.93.222
|
34
|
+
- 66.55.37.179
|
35
|
+
- 198.45.18.20
|
36
|
+
- 38.104.227.3
|
37
|
+
spider_names:
|
38
|
+
- bot
|
39
|
+
- spider
|
40
|
+
- slurp
|
41
|
+
- root.exe
|
42
|
+
- .dll
|
43
|
+
- slurp
|
44
|
+
- looksmart
|
45
|
+
- nutchsvc
|
46
|
+
- iconsurf
|
47
|
+
- objectsearch
|
48
|
+
- openfind
|
49
|
+
- iltovatore
|
50
|
+
- mozdex
|
51
|
+
- netresearch
|
52
|
+
- konsqueror
|
53
|
+
- crawler
|
54
|
+
- searchme
|
55
|
+
- java/1.6.0_04
|
56
|
+
- scoutjet
|
57
|
+
- yeti
|
58
|
+
- yandex
|
59
|
+
page_numbers:
|
60
|
+
- /visit/index,1
|
61
|
+
- /visit/show/,2
|
62
|
+
- /visit/vcardedit/,3
|
63
|
+
- /visit/showmap_na,4
|
64
|
+
- /visit/showmap_row,5
|
65
|
+
match_page_numbers: true
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# Configuration data for logger
|
2
|
+
#max_rec_count: 100
|
3
|
+
visit_timeout: 1200
|
4
|
+
summary_flag: true
|
5
|
+
convert_to_lower_case: true
|
6
|
+
log_level: warn
|
7
|
+
#log_level options are debug, info, warn, error, fatal
|
8
|
+
homepage: /home.aspx
|
9
|
+
#homepage: /
|
10
|
+
accept_only_homepage: true
|
11
|
+
page_urls:
|
12
|
+
- aspx
|
13
|
+
- asp
|
14
|
+
hide_urls:
|
15
|
+
- css
|
16
|
+
- js
|
17
|
+
- gif
|
18
|
+
- swf
|
19
|
+
- ico
|
20
|
+
- chart
|
21
|
+
- robots
|
22
|
+
hostname: www.sigma-systems.com
|
23
|
+
drop_refers_by_hostname: true
|
24
|
+
use_local_time: true
|
25
|
+
download_page_number: 45
|
26
|
+
drop_ips:
|
27
|
+
- 76.12.185.100
|
28
|
+
spider_ips:
|
29
|
+
- 66.98.254.55
|
30
|
+
- 64.208.168.252
|
31
|
+
- 64.235.108.183
|
32
|
+
- 76.2.144.115
|
33
|
+
- 66.98.254.236
|
34
|
+
- 202.108.22.132
|
35
|
+
- 89.122.29.77
|
36
|
+
- 95.174.93.222
|
37
|
+
- 66.55.37.179
|
38
|
+
- 198.45.18.20
|
39
|
+
- 38.104.227.3
|
40
|
+
spider_names:
|
41
|
+
- bot
|
42
|
+
- spider
|
43
|
+
- slurp
|
44
|
+
- root.exe
|
45
|
+
- .dll
|
46
|
+
- slurp
|
47
|
+
- looksmart
|
48
|
+
- nutchsvc
|
49
|
+
- iconsurf
|
50
|
+
- objectsearch
|
51
|
+
- openfind
|
52
|
+
- iltovatore
|
53
|
+
- mozdex
|
54
|
+
- netresearch
|
55
|
+
- konsqueror
|
56
|
+
- crawler
|
57
|
+
- searchme
|
58
|
+
- java/1.6.0_04
|
59
|
+
- scoutjet
|
60
|
+
- yeti
|
61
|
+
- yandex
|
62
|
+
# convert urls to assigned numbers where numbers cannot be parsed from url
|
63
|
+
assigned_numbers:
|
64
|
+
- /visit/index,1
|
65
|
+
- /visit/show/,2
|
66
|
+
- /visit/vcardedit/,3
|
67
|
+
- /visit/showmap_na,4
|
68
|
+
- /visit/showmap_row,5
|
69
|
+
match_page_numbers: false
|