vizi_tracker 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/README.rdoc +116 -0
  2. data/Rakefile +44 -0
  3. data/config/logger-backup.yml +49 -0
  4. data/config/logger.yml +69 -0
  5. data/config/logger_apache.yml +65 -0
  6. data/config/logger_sample.yml +69 -0
  7. data/data/exlog.log +5458 -0
  8. data/data/sample-alter.log +11870 -0
  9. data/data/sample-surf.log +47 -0
  10. data/data/sample-wle.log +30474 -0
  11. data/data/testlog.log +270 -0
  12. data/data/vizitrax.log +17951 -0
  13. data/doc/Object.html +200 -0
  14. data/doc/ParserTest.html +268 -0
  15. data/doc/README_rdoc.html +128 -0
  16. data/doc/Rakefile.html +148 -0
  17. data/doc/Visit.html +487 -0
  18. data/doc/VisitList.html +385 -0
  19. data/doc/Vizi/LogFormat.html +377 -0
  20. data/doc/Vizi/LogParser.html +551 -0
  21. data/doc/Vizi/Visit.html +487 -0
  22. data/doc/Vizi/VisitList.html +386 -0
  23. data/doc/Vizi.html +168 -0
  24. data/doc/ViziLogFormat.html +382 -0
  25. data/doc/ViziLogParser.html +551 -0
  26. data/doc/created.rid +8 -0
  27. data/doc/formats/apache-custom-log.pdf +0 -0
  28. data/doc/formats/apache.rtf +238 -0
  29. data/doc/formats/format-descriptions.xls +0 -0
  30. data/doc/formats/w3c-extended.pdf +0 -0
  31. data/doc/formats/w3c-extended.rtf +135 -0
  32. data/doc/images/brick.png +0 -0
  33. data/doc/images/brick_link.png +0 -0
  34. data/doc/images/bug.png +0 -0
  35. data/doc/images/bullet_black.png +0 -0
  36. data/doc/images/bullet_toggle_minus.png +0 -0
  37. data/doc/images/bullet_toggle_plus.png +0 -0
  38. data/doc/images/date.png +0 -0
  39. data/doc/images/find.png +0 -0
  40. data/doc/images/loadingAnimation.gif +0 -0
  41. data/doc/images/macFFBgHack.png +0 -0
  42. data/doc/images/package.png +0 -0
  43. data/doc/images/page_green.png +0 -0
  44. data/doc/images/page_white_text.png +0 -0
  45. data/doc/images/page_white_width.png +0 -0
  46. data/doc/images/plugin.png +0 -0
  47. data/doc/images/ruby.png +0 -0
  48. data/doc/images/tag_green.png +0 -0
  49. data/doc/images/wrench.png +0 -0
  50. data/doc/images/wrench_orange.png +0 -0
  51. data/doc/images/zoom.png +0 -0
  52. data/doc/index.html +112 -0
  53. data/doc/js/darkfish.js +116 -0
  54. data/doc/js/jquery.js +32 -0
  55. data/doc/js/quicksearch.js +114 -0
  56. data/doc/js/thickbox-compressed.js +10 -0
  57. data/doc/lib/vizi/parser_rb.html +63 -0
  58. data/doc/lib/vizi/vizi_tracker_rb.html +63 -0
  59. data/doc/lib/vizi_log_parser_rb.html +56 -0
  60. data/doc/lib/vizi_tracker_rb.html +56 -0
  61. data/doc/rdoc.css +759 -0
  62. data/doc/test/parser_test_rb.html +54 -0
  63. data/doc/test/test_helper_rb.html +56 -0
  64. data/doc/testit_rb.html +63 -0
  65. data/lib/vizi/vizi_tracker.rb +406 -0
  66. data/lib/vizi_tracker.rb +5 -0
  67. data/log/parse.log +79 -0
  68. data/log/system.log +66 -0
  69. data/test/parser_test.rb +48 -0
  70. data/test/test_helper.rb +3 -0
  71. data/testit.rb +105 -0
  72. data/vizi_tracker.gemspec +21 -0
  73. metadata +146 -0
data/README.rdoc ADDED
@@ -0,0 +1,116 @@
1
+ = ViziTracker
2
+
3
+ == Introduction
4
+
5
+ This gem provides a set of classes to support the parsing of web log files and
6
+ the creation of Visit records from the individual parsed web log records
7
+
8
+ == Installation
9
+
10
+ Just run:
11
+
12
+ gem install vizi_tracker
13
+
14
+ The following code uses a YAML file to store the configuration attributes that wil
15
+ drive the logging activity. This file (logger.yml) should be setup and stored in the
16
+ config sub-directory. Refer also to logger_sample.yml for more details on the
17
+ configuration values.
18
+
19
+ == Usage
20
+
21
+ require 'vizi_tracker'
22
+ require 'yaml'
23
+ require 'logger'
24
+
25
+ config = YAML.load_file("config/logger.yml")
26
+
27
+ @@download_page_number = config["download_page_number"]
28
+ @@visit_timeout = config["visit_timeout"]
29
+
30
+ # Initialize the log parser
31
+ parser = Vizi::LogParser.new(config["drop_ips"], config["spider_ips"],
32
+ config["spider_names"], config["page_urls"], config["hide_urls"],
33
+ config["homepage"], config["accept_only_homepage"],config["hostname"],
34
+ config["drop_refers_by_hostname"], config["use_local_time"],
35
+ config["assigned_numbers"], config["match_page_numbers"])
36
+
37
+ syslog = Logger.new('./log/system.log',shift_age = 'weekly')
38
+ case config["log_level"]
39
+ when "info"
40
+ syslog.level = Logger::INFO
41
+ when "warn"
42
+ syslog.level = Logger::WARN
43
+ when "error"
44
+ syslog.level = Logger::ERROR
45
+ when "fatal"
46
+ syslog.level = Logger::FATAL
47
+ else
48
+ syslog.level = Logger::DEBUG
49
+ end
50
+ syslog.info "starting ... >>> "+Time.now.to_s
51
+
52
+ # Open log file for reading
53
+ File.open('./data/exlog.log', 'r') do |file|
54
+ vlist = Vizi::VisitList.new
55
+ rec_count = 0
56
+ hit_count = 0
57
+ max_rec_count = 99999
58
+ max_rec_count = config["max_rec_count"] if config["max_rec_count"]
59
+ visit_count = 0
60
+ page_count = 0
61
+ human_count = 0
62
+ drop_count = 0
63
+ spider_count = 0
64
+ start_time = Time.now
65
+ logformat = nil
66
+ # Begin to parse each record
67
+ while(line = file.gets)
68
+ parsed_data = parser.parse_line(line, logformat)
69
+ logformat = parsed_data[:p_logformat]
70
+ rec_count = rec_count + 1
71
+ next if parsed_data[:p_linetype] != "V"
72
+ hit_count = hit_count + 1
73
+ page_count = page_count + 1 if parsed_data[:p_pageflag]
74
+ @visit=vlist.find_by_ip(parsed_data[:ip])
75
+ if @visit.nil?
76
+ vlist.append(Vizi::Visit.new(parsed_data[:ip],parsed_data[:datetime],parsed_data[:csuristem],parsed_data[:csuriquery], parsed_data[:timetaken],
77
+ parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase],parsed_data[:p_pageid]))
78
+ @visit=vlist.find_by_ip(parsed_data[:ip])
79
+ visit_count = visit_count + 1
80
+ else
81
+ @visit.update(parsed_data[:datetime],parsed_data[:csuriquery],parsed_data[:timetaken],
82
+ parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase], parsed_data[:p_pageid])
83
+ end
84
+ @visits = vlist.find_expired(@visit.start_dt)
85
+ if @visits
86
+ @visits.sendoutput
87
+ vlist.delete(@visits)
88
+ human_count = human_count + 1 if @visits.visitortype == "H"
89
+ drop_count = drop_count + 1 if @visits.visitortype == "D"
90
+ spider_count = spider_count + 1 if @visits.visitortype == "S"
91
+ end
92
+ break if rec_count == max_rec_count
93
+ end
94
+ @visits = vlist.find_all
95
+ @visits.each {|v|
96
+ v.sendoutput
97
+ human_count = human_count + 1 if v.visitortype == "H"
98
+ drop_count = drop_count + 1 if v.visitortype == "D"
99
+ spider_count = spider_count + 1 if v.visitortype == "S"
100
+ }
101
+ if config["summary_flag"]
102
+ syslog.info "Record count is "+rec_count.to_s
103
+ syslog.info "Hit count is "+hit_count.to_s
104
+ syslog.info "Page count is "+page_count.to_s
105
+ syslog.info "Total visit count is "+visit_count.to_s
106
+ syslog.info "Human visit count is "+human_count.to_s
107
+ syslog.info "Drop visit count is "+drop_count.to_s
108
+ syslog.info "Spider visit count is "+spider_count.to_s
109
+ syslog.info "Batch processing time "+(Time.now-start_time).to_s
110
+ end
111
+ syslog.info "ending ... >>> "+Time.now.to_s
112
+ end
113
+
114
+ == License
115
+
116
+ This code is made available under the MIT license. It is based on original parser code from Jan Wikholm.
data/Rakefile ADDED
@@ -0,0 +1,44 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rake/rdoctask'
4
+ require 'rake/testtask'
5
+
6
+ spec = Gem::Specification.new do |s|
7
+ s.name = "vizi_tracker"
8
+ s.version = "0.1.0"
9
+ s.author = "Al Kivi"
10
+ s.email = "al.kivi at vizitrax.com"
11
+ s.homepage = "http://github.com/al-kivi/vizi_tracker"
12
+ s.description = "Univeral web log file parser and visit analyzer"
13
+ s.summary = "A package for parsing web server logs and creating visit records"
14
+
15
+ s.platform = Gem::Platform::RUBY
16
+ s.has_rdoc = true
17
+ s.extra_rdoc_files = ["README.rdoc"]
18
+
19
+ s.require_path = "lib"
20
+ s.files = %w(README.rdoc Rakefile) + Dir.glob("lib/**/*")
21
+ end
22
+
23
+ Rake::GemPackageTask.new(spec) do |pkg|
24
+ pkg.need_tar = true
25
+ end
26
+
27
+ Rake::RDocTask.new(:rdoc) do |rdoc|
28
+ rdoc.rdoc_dir = 'rdoc'
29
+ rdoc.title = 'ViziTracker'
30
+ rdoc.options << '--line-numbers' << '--inline-source'
31
+ rdoc.rdoc_files.include('README')
32
+ rdoc.rdoc_files.include('lib/**/*.rb')
33
+ end
34
+
35
+ Rake::TestTask.new do |t|
36
+ t.libs << 'test'
37
+ t.test_files = FileList["test/**/*_test.rb"]
38
+ t.verbose = true
39
+ end
40
+
41
+ task :default => "pkg/#{spec.name}-#{spec.version}.gem" do
42
+ puts "generated latest version"
43
+ end
44
+
@@ -0,0 +1,49 @@
1
+ # Configuration data for logger
2
+ visit_timeout: 1200
3
+ summary_flag: true
4
+ convert_to_lower_case: true
5
+ #max_rec_count: 100
6
+ homepage: home.aspx
7
+ accept_only_homepage: true
8
+ page_extensions:
9
+ - aspx
10
+ - asp
11
+ hostname: www.sigma-systems.com
12
+ drop_refers_by_hostname: true
13
+ drop_ips:
14
+ - 76.12.185.100
15
+ spider_ips:
16
+ - 66.98.254.55
17
+ - 64.208.168.252
18
+ - 64.235.108.183
19
+ - 76.2.144.115
20
+ - 66.98.254.236
21
+ - 202.108.22.132
22
+ - 89.122.29.77
23
+ - 95.174.93.222
24
+ - 66.55.37.179
25
+ - 198.45.18.20
26
+ - 38.104.227.3
27
+ spider_names:
28
+ - bot
29
+ - spider
30
+ - slurp
31
+ - root.exe
32
+ - .dll
33
+ - slurp
34
+ - looksmart
35
+ - nutchsvc
36
+ - iconsurf
37
+ - objectsearch
38
+ - openfind
39
+ - iltovatore
40
+ - mozdex
41
+ - netresearch
42
+ - konsqueror
43
+ - crawler
44
+ - searchme
45
+ - java/1.6.0_04
46
+ - scoutjet
47
+ - yeti
48
+ - yandex
49
+ drop_spiders: true
data/config/logger.yml ADDED
@@ -0,0 +1,69 @@
1
+ # Configuration data for logger
2
+ #max_rec_count: 100
3
+ visit_timeout: 1200
4
+ summary_flag: true
5
+ convert_to_lower_case: true
6
+ log_level: warn
7
+ #log_level options are debug, info, warn, error, fatal
8
+ homepage: /home.aspx
9
+ #homepage: /
10
+ accept_only_homepage: true
11
+ page_urls:
12
+ - aspx
13
+ - asp
14
+ hide_urls:
15
+ - css
16
+ - js
17
+ - gif
18
+ - swf
19
+ - ico
20
+ - chart
21
+ - robots
22
+ hostname: www.sigma-systems.com
23
+ drop_refers_by_hostname: true
24
+ use_local_time: true
25
+ download_page_number: 45
26
+ drop_ips:
27
+ - 76.12.185.100
28
+ spider_ips:
29
+ - 66.98.254.55
30
+ - 64.208.168.252
31
+ - 64.235.108.183
32
+ - 76.2.144.115
33
+ - 66.98.254.236
34
+ - 202.108.22.132
35
+ - 89.122.29.77
36
+ - 95.174.93.222
37
+ - 66.55.37.179
38
+ - 198.45.18.20
39
+ - 38.104.227.3
40
+ spider_names:
41
+ - bot
42
+ - spider
43
+ - slurp
44
+ - root.exe
45
+ - .dll
46
+ - slurp
47
+ - looksmart
48
+ - nutchsvc
49
+ - iconsurf
50
+ - objectsearch
51
+ - openfind
52
+ - iltovatore
53
+ - mozdex
54
+ - netresearch
55
+ - konsqueror
56
+ - crawler
57
+ - searchme
58
+ - java/1.6.0_04
59
+ - scoutjet
60
+ - yeti
61
+ - yandex
62
+ # convert urls to assigned numbers where numbers cannot be parsed from url
63
+ assigned_numbers:
64
+ - /visit/index,1
65
+ - /visit/show/,2
66
+ - /visit/vcardedit/,3
67
+ - /visit/showmap_na,4
68
+ - /visit/showmap_row,5
69
+ match_page_numbers: false
@@ -0,0 +1,65 @@
1
+ # Configuration data for logger
2
+ #max_rec_count: 100
3
+ visit_timeout: 1200
4
+ summary_flag: true
5
+ convert_to_lower_case: true
6
+ #homepage: /home.aspx
7
+ homepage: /
8
+ accept_only_homepage: true
9
+ page_urls:
10
+ - aspx
11
+ - asp
12
+ hide_urls:
13
+ - css
14
+ - js
15
+ - gif
16
+ - swf
17
+ - ico
18
+ - chart
19
+ - robots
20
+ hostname: www.sigma-systems.com
21
+ drop_refers_by_hostname: true
22
+ getlocal_time: true
23
+ drop_ips:
24
+ - 76.12.185.100
25
+ spider_ips:
26
+ - 66.98.254.55
27
+ - 64.208.168.252
28
+ - 64.235.108.183
29
+ - 76.2.144.115
30
+ - 66.98.254.236
31
+ - 202.108.22.132
32
+ - 89.122.29.77
33
+ - 95.174.93.222
34
+ - 66.55.37.179
35
+ - 198.45.18.20
36
+ - 38.104.227.3
37
+ spider_names:
38
+ - bot
39
+ - spider
40
+ - slurp
41
+ - root.exe
42
+ - .dll
43
+ - slurp
44
+ - looksmart
45
+ - nutchsvc
46
+ - iconsurf
47
+ - objectsearch
48
+ - openfind
49
+ - iltovatore
50
+ - mozdex
51
+ - netresearch
52
+ - konsqueror
53
+ - crawler
54
+ - searchme
55
+ - java/1.6.0_04
56
+ - scoutjet
57
+ - yeti
58
+ - yandex
59
+ page_numbers:
60
+ - /visit/index,1
61
+ - /visit/show/,2
62
+ - /visit/vcardedit/,3
63
+ - /visit/showmap_na,4
64
+ - /visit/showmap_row,5
65
+ match_page_numbers: true
@@ -0,0 +1,69 @@
1
+ # Configuration data for logger
2
+ #max_rec_count: 100
3
+ visit_timeout: 1200
4
+ summary_flag: true
5
+ convert_to_lower_case: true
6
+ log_level: warn
7
+ #log_level options are debug, info, warn, error, fatal
8
+ homepage: /home.aspx
9
+ #homepage: /
10
+ accept_only_homepage: true
11
+ page_urls:
12
+ - aspx
13
+ - asp
14
+ hide_urls:
15
+ - css
16
+ - js
17
+ - gif
18
+ - swf
19
+ - ico
20
+ - chart
21
+ - robots
22
+ hostname: www.sigma-systems.com
23
+ drop_refers_by_hostname: true
24
+ use_local_time: true
25
+ download_page_number: 45
26
+ drop_ips:
27
+ - 76.12.185.100
28
+ spider_ips:
29
+ - 66.98.254.55
30
+ - 64.208.168.252
31
+ - 64.235.108.183
32
+ - 76.2.144.115
33
+ - 66.98.254.236
34
+ - 202.108.22.132
35
+ - 89.122.29.77
36
+ - 95.174.93.222
37
+ - 66.55.37.179
38
+ - 198.45.18.20
39
+ - 38.104.227.3
40
+ spider_names:
41
+ - bot
42
+ - spider
43
+ - slurp
44
+ - root.exe
45
+ - .dll
46
+ - slurp
47
+ - looksmart
48
+ - nutchsvc
49
+ - iconsurf
50
+ - objectsearch
51
+ - openfind
52
+ - iltovatore
53
+ - mozdex
54
+ - netresearch
55
+ - konsqueror
56
+ - crawler
57
+ - searchme
58
+ - java/1.6.0_04
59
+ - scoutjet
60
+ - yeti
61
+ - yandex
62
+ # convert urls to assigned numbers where numbers cannot be parsed from url
63
+ assigned_numbers:
64
+ - /visit/index,1
65
+ - /visit/show/,2
66
+ - /visit/vcardedit/,3
67
+ - /visit/showmap_na,4
68
+ - /visit/showmap_row,5
69
+ match_page_numbers: false