vizi_tracker 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +116 -0
- data/Rakefile +44 -0
- data/config/logger-backup.yml +49 -0
- data/config/logger.yml +69 -0
- data/config/logger_apache.yml +65 -0
- data/config/logger_sample.yml +69 -0
- data/data/exlog.log +5458 -0
- data/data/sample-alter.log +11870 -0
- data/data/sample-surf.log +47 -0
- data/data/sample-wle.log +30474 -0
- data/data/testlog.log +270 -0
- data/data/vizitrax.log +17951 -0
- data/doc/Object.html +200 -0
- data/doc/ParserTest.html +268 -0
- data/doc/README_rdoc.html +128 -0
- data/doc/Rakefile.html +148 -0
- data/doc/Visit.html +487 -0
- data/doc/VisitList.html +385 -0
- data/doc/Vizi/LogFormat.html +377 -0
- data/doc/Vizi/LogParser.html +551 -0
- data/doc/Vizi/Visit.html +487 -0
- data/doc/Vizi/VisitList.html +386 -0
- data/doc/Vizi.html +168 -0
- data/doc/ViziLogFormat.html +382 -0
- data/doc/ViziLogParser.html +551 -0
- data/doc/created.rid +8 -0
- data/doc/formats/apache-custom-log.pdf +0 -0
- data/doc/formats/apache.rtf +238 -0
- data/doc/formats/format-descriptions.xls +0 -0
- data/doc/formats/w3c-extended.pdf +0 -0
- data/doc/formats/w3c-extended.rtf +135 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +112 -0
- data/doc/js/darkfish.js +116 -0
- data/doc/js/jquery.js +32 -0
- data/doc/js/quicksearch.js +114 -0
- data/doc/js/thickbox-compressed.js +10 -0
- data/doc/lib/vizi/parser_rb.html +63 -0
- data/doc/lib/vizi/vizi_tracker_rb.html +63 -0
- data/doc/lib/vizi_log_parser_rb.html +56 -0
- data/doc/lib/vizi_tracker_rb.html +56 -0
- data/doc/rdoc.css +759 -0
- data/doc/test/parser_test_rb.html +54 -0
- data/doc/test/test_helper_rb.html +56 -0
- data/doc/testit_rb.html +63 -0
- data/lib/vizi/vizi_tracker.rb +406 -0
- data/lib/vizi_tracker.rb +5 -0
- data/log/parse.log +79 -0
- data/log/system.log +66 -0
- data/test/parser_test.rb +48 -0
- data/test/test_helper.rb +3 -0
- data/testit.rb +105 -0
- data/vizi_tracker.gemspec +21 -0
- metadata +146 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: parser_test.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="../js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="../js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="../js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="../js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">2011-05-11 20:32:49 -0400</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
<li>uri</li>
|
35
|
+
|
36
|
+
</ul>
|
37
|
+
</dd>
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
</dl>
|
42
|
+
</div>
|
43
|
+
|
44
|
+
<div id="documentation">
|
45
|
+
|
46
|
+
<div class="description">
|
47
|
+
<h2>Description</h2>
|
48
|
+
|
49
|
+
</div>
|
50
|
+
|
51
|
+
</div>
|
52
|
+
</body>
|
53
|
+
</html>
|
54
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: test_helper.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="../js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="../js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="../js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="../js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">2011-05-11 20:32:49 -0400</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
<li>test/unit</li>
|
35
|
+
|
36
|
+
<li>ruby-debug</li>
|
37
|
+
|
38
|
+
</ul>
|
39
|
+
</dd>
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
</dl>
|
44
|
+
</div>
|
45
|
+
|
46
|
+
<div id="documentation">
|
47
|
+
|
48
|
+
<div class="description">
|
49
|
+
<h2>Description</h2>
|
50
|
+
|
51
|
+
</div>
|
52
|
+
|
53
|
+
</div>
|
54
|
+
</body>
|
55
|
+
</html>
|
56
|
+
|
data/doc/testit_rb.html
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: testit.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="./rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="./js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="./js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="./js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="./js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">2011-05-21 12:58:06 -0400</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
<li>c:\rails\vizi_tracker\lib\vizi_tracker</li>
|
35
|
+
|
36
|
+
<li>time</li>
|
37
|
+
|
38
|
+
<li>yaml</li>
|
39
|
+
|
40
|
+
<li>logger</li>
|
41
|
+
|
42
|
+
</ul>
|
43
|
+
</dd>
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
</dl>
|
48
|
+
</div>
|
49
|
+
|
50
|
+
<div id="documentation">
|
51
|
+
|
52
|
+
<div class="description">
|
53
|
+
<h2>Description</h2>
|
54
|
+
|
55
|
+
<p>This is a sample application that uses the Vizitracker gem classes Modified
|
56
|
+
by ...</p>
|
57
|
+
|
58
|
+
</div>
|
59
|
+
|
60
|
+
</div>
|
61
|
+
</body>
|
62
|
+
</html>
|
63
|
+
|
@@ -0,0 +1,406 @@
|
|
1
|
+
# This gem module provides a set of classes to support the parsing of web log files and
|
2
|
+
# the creation of Visit records from the individual parsed web log records
|
3
|
+
#
|
4
|
+
# The LogFormat and LogParser classes were derived in part from an Apache logger application
|
5
|
+
# developed by Jan Wikholm. These two classes were extended to support both Apache and IIS
|
6
|
+
# web logs. The details from the web logs are assembled to compose Visit objects and Visit
|
7
|
+
# history detail.
|
8
|
+
#
|
9
|
+
# Author:: Al Kivi <al.kivi@vizitrax.com>
|
10
|
+
# License:: MIT
|
11
|
+
|
12
|
+
module Vizi
|
13
|
+
# This class handles the parsing of each line in the log file
|
14
|
+
class LogFormat
|
15
|
+
attr_reader :name, :format, :format_symbols, :format_regex
|
16
|
+
|
17
|
+
DIRECTIVES = {
|
18
|
+
# format string char => [:symbol to use, /regex to use when matching against log/]
|
19
|
+
'h' => [:ip, /\d+\.\d+\.\d+\.\d+/], # apache and IIS: called c-ip in IIS
|
20
|
+
'p' => [:sip, /\d+\.\d+\.\d+\.\d+/], # IIS:
|
21
|
+
'g' => [:auth, /\S*/], # apache:
|
22
|
+
'u' => [:username, /\S*/], # apache and IIS: called cs-username in IIS
|
23
|
+
't' => [:dtstring, /\[.*?\]/], # apache: one field with date and time
|
24
|
+
'd' => [:datestring, /\d+\-\d+\-\d+/], # IIS:
|
25
|
+
'e' => [:timestring, /\d+\:\d+\:\d+/], # IIS:
|
26
|
+
'r' => [:request, /.*?/], # apache: includes both csmethod and csuristem
|
27
|
+
'm' => [:csmethod, /\w*?/], # IIS:
|
28
|
+
'w' => [:csuristem, /\S*/], # IIS:
|
29
|
+
's' => [:status, /\d+/], # apache and IIS: is called sc_status in IIS
|
30
|
+
'b' => [:bytecount, /-|\d+/], # apache and IIS: is called cs_bytes in IIS
|
31
|
+
'v' => [:domain, /.*?/], # apache and IIS: is c-computername in IIS
|
32
|
+
'i' => [:header_lines, /.*?/], # apache: transforms to useragent or referer or cookies
|
33
|
+
'a' => [:useragent, /\S*/], # IIS:
|
34
|
+
'j' => [:referer, /\S*/], # IIS:
|
35
|
+
'k' => [:cscookie, /\d+/], # IIS:
|
36
|
+
'q' => [:csuriquery, /.*/], # IIS:
|
37
|
+
'y' => [:csbytes, /d+/], # IIS:
|
38
|
+
'o' => [:sport, /\d+/], # IIS:
|
39
|
+
'x' => [:scsubstatus, /\d+/], # IIS:
|
40
|
+
'z' => [:cshost, /\d+/], # IIS:
|
41
|
+
'l' => [:win32status, /\d+/], # IIS:
|
42
|
+
'n' => [:timetaken, /\d+/], # IIS:
|
43
|
+
'c' => [:comment, /^#/], # IIS: comment line identifier
|
44
|
+
'f' => [:fields, /^#Fields:/] # IIS: field line identifier
|
45
|
+
}
|
46
|
+
|
47
|
+
# This method initializes the LogFormat object with fieldnames and log formats
|
48
|
+
def initialize(name, format)
|
49
|
+
@name, @format = name, format
|
50
|
+
parse_format(format)
|
51
|
+
end
|
52
|
+
|
53
|
+
# The symbols are used to map the log to the env variables
|
54
|
+
# The regex is used when checking what format the log is and to extract data
|
55
|
+
def parse_format(format)
|
56
|
+
format_directive = /%(.*?)(\{.*?\})?([#{[DIRECTIVES.keys.join('|')]}])([\s\\"]*)/
|
57
|
+
log_format_symbols = []
|
58
|
+
format_regex = ""
|
59
|
+
format.scan(format_directive) do |condition, subdirective, directive_char, ignored|
|
60
|
+
log_format, match_regex = process_directive(directive_char, subdirective, condition)
|
61
|
+
ignored.gsub!(/\s/, '\\s') unless ignored.nil?
|
62
|
+
log_format_symbols << log_format
|
63
|
+
format_regex << "(#{match_regex})#{ignored}"
|
64
|
+
end
|
65
|
+
@format_symbols = log_format_symbols
|
66
|
+
@format_regex = /^#{format_regex}/
|
67
|
+
end
|
68
|
+
|
69
|
+
def process_directive(directive_char, subdirective, condition)
|
70
|
+
directive = DIRECTIVES[directive_char]
|
71
|
+
case directive_char
|
72
|
+
when 'i'
|
73
|
+
log_format = subdirective[1...-1].downcase.tr('-', '_').to_sym
|
74
|
+
[log_format, directive[1].source]
|
75
|
+
else
|
76
|
+
[directive[0], directive[1].source]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# This class handles the parsing of each line in the log file
|
82
|
+
class LogParser
|
83
|
+
require 'time'
|
84
|
+
|
85
|
+
LOG_FORMATS = {
|
86
|
+
:common => '%h %g %u %t \"%r\" %>s %b',
|
87
|
+
:common_with_virtual => '%v %h %g %u %t \"%r\" %>s %b',
|
88
|
+
:combined => '%h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
|
89
|
+
:combined_with_virtual => '%v %h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
|
90
|
+
:combined_with_cookies => '%h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookies}i\"',
|
91
|
+
:w3c_c => '%c', # format is comment ... still looking
|
92
|
+
:w3c_f => '%f' # format is IIS fields ... ready to parse
|
93
|
+
}
|
94
|
+
|
95
|
+
FIELDNAMES = {
|
96
|
+
'c-ip' => 'h',
|
97
|
+
's-ip' => 'p',
|
98
|
+
'cs-username' => 'u',
|
99
|
+
'sc-status' => 's',
|
100
|
+
'cs-bytes' => 'y',
|
101
|
+
'sc-bytes' => 'b',
|
102
|
+
'date' => 'd',
|
103
|
+
'time' => 'e',
|
104
|
+
'cs-method' => 'm',
|
105
|
+
'cs-uri-stem' => 'w',
|
106
|
+
'cs-uri-query' => 'q',
|
107
|
+
'cs(Referer)' => 'j', # internal shortened to referer
|
108
|
+
'cs(User-Agent)' => 'a', # internal shortened to useragent
|
109
|
+
'cs(Cookie)' => 'k', # internal shortened to cscookie
|
110
|
+
's-port' => 'o',
|
111
|
+
'cs-host' => 'z',
|
112
|
+
'sc-substatus' => 'x',
|
113
|
+
'sc-win32-status' => 'l',
|
114
|
+
'time-taken' => 'n',
|
115
|
+
's-computername' => 'v'
|
116
|
+
}
|
117
|
+
|
118
|
+
attr_reader :known_formats
|
119
|
+
|
120
|
+
#@@log = ActiveRecord::Base.logger
|
121
|
+
|
122
|
+
# This method initialises LogParser object and loads the configurable logger control items
|
123
|
+
def initialize(drop_ips, spider_ips, spider_names, page_urls, hide_urls, homepage, accept_only_homepage,
|
124
|
+
hostname, drop_refers_by_hostname, use_local_time, assigned_numbers, match_page_numbers)
|
125
|
+
@drops = drop_ips
|
126
|
+
@sips = spider_ips
|
127
|
+
@snames = spider_names
|
128
|
+
@page_urls = page_urls
|
129
|
+
@hide_urls = hide_urls
|
130
|
+
@homepage = homepage
|
131
|
+
@accept_only_homepage = accept_only_homepage
|
132
|
+
@hostname = hostname
|
133
|
+
@drop_refers_by_hostname = drop_refers_by_hostname
|
134
|
+
@use_local_time = use_local_time
|
135
|
+
@assigned_numbers = assigned_numbers
|
136
|
+
@match_page_numbers = match_page_numbers
|
137
|
+
@log_format = []
|
138
|
+
initialize_known_formats
|
139
|
+
@parselog = Logger.new('./log/parse.log', shift_age = 'weekly')
|
140
|
+
@parselog.level = Logger::WARN
|
141
|
+
end
|
142
|
+
|
143
|
+
# Processes the format string into symbols and test regex and saves using LogFormat class
|
144
|
+
def initialize_known_formats
|
145
|
+
@known_formats = {}
|
146
|
+
LOG_FORMATS.each do |name, format|
|
147
|
+
@known_formats[name] = Vizi::LogFormat.new(name, format)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Checks which standard the log file (well one line) is
|
152
|
+
# Automatically checks for most complex (longest) regex first ...
|
153
|
+
def check_format(line)
|
154
|
+
@known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format|
|
155
|
+
return key if line.match(log_format.format_regex)
|
156
|
+
}
|
157
|
+
return :unknown
|
158
|
+
end
|
159
|
+
|
160
|
+
# Builds the format from the IIS fielnames
|
161
|
+
def build_format(line)
|
162
|
+
fields = line.split(' ')
|
163
|
+
i = 1
|
164
|
+
@format = ""
|
165
|
+
while i < fields.length
|
166
|
+
@format << "%"+FIELDNAMES[fields[i]]+" "
|
167
|
+
i = i + 1
|
168
|
+
end
|
169
|
+
return @format
|
170
|
+
end
|
171
|
+
|
172
|
+
# Match a partial string in field against an external field array
|
173
|
+
def match_partial (field, fldarray)
|
174
|
+
hit = nil
|
175
|
+
i = 0
|
176
|
+
while i < fldarray.length
|
177
|
+
hit = field.index(fldarray[i])
|
178
|
+
break if hit
|
179
|
+
i = i + 1
|
180
|
+
end
|
181
|
+
return hit
|
182
|
+
end
|
183
|
+
|
184
|
+
# Find an assigned number from matching string against an external field array
|
185
|
+
def find_assigned_number (field, fldarray)
|
186
|
+
pageid = 0
|
187
|
+
i = 0
|
188
|
+
while i < fldarray.length
|
189
|
+
hit = fldarray[i].index(field)
|
190
|
+
if hit
|
191
|
+
z = fldarray[i].split(',')
|
192
|
+
pageid = z[1].to_i
|
193
|
+
break
|
194
|
+
end
|
195
|
+
i = i + 1
|
196
|
+
end
|
197
|
+
return pageid
|
198
|
+
end
|
199
|
+
|
200
|
+
# apache files ... regex the file to determine logformat name
|
201
|
+
# IIS files ... parse the fields string to determine the file contents
|
202
|
+
def parse_line(line, logformat)
|
203
|
+
if logformat != nil
|
204
|
+
log_format = logformat # get log_format string
|
205
|
+
@format_name = "temp"
|
206
|
+
data = line.split(' ')
|
207
|
+
else
|
208
|
+
@format_name = check_format(line) # look for matching formats, check each time
|
209
|
+
log_format = @known_formats[@format_name] # found a matched format
|
210
|
+
raise ArgumentError if log_format.nil? or line !~ log_format.format_regex
|
211
|
+
data = line.downcase.scan(log_format.format_regex).flatten
|
212
|
+
end
|
213
|
+
parsed_data = {}
|
214
|
+
log_format.format_symbols.size.times do |i|
|
215
|
+
parsed_data[log_format.format_symbols[i]] = data[i] # load data for each format_symbol
|
216
|
+
end
|
217
|
+
|
218
|
+
parsed_data[:p_logformatname] = @format_name.to_s
|
219
|
+
parsed_data[:p_logformat] = logformat
|
220
|
+
parsed_data[:p_visitortype] = "H" # set default visitor type (H)uman
|
221
|
+
parsed_data[:p_linetype] = "V" # linetype is (V)isitors
|
222
|
+
parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
|
223
|
+
if @format_name.to_s == "w3c_f" # IIS file name ... generic
|
224
|
+
@format = build_format(line) # parse fields to get log_format
|
225
|
+
temp_format = Vizi::LogFormat.new(:temp, @format) # create temp format
|
226
|
+
parsed_data[:p_logformat] = temp_format # shuttle the log_format object
|
227
|
+
parsed_data[:p_logformatname] = "iis" # change the name to iis
|
228
|
+
parsed_data[:p_linetype] = "F" # linetype to (F)ield list
|
229
|
+
parsed_data[:p_visitortype] = "-" # visitor type not relevant
|
230
|
+
elsif @format_name.to_s == "w3c_c" # found IIS file in comments section
|
231
|
+
parsed_data[:p_linetype] = "C" # linetype is (C)omment
|
232
|
+
parsed_data[:p_visitortype] = "-"
|
233
|
+
elsif parsed_data[:p_linetype] == "C"
|
234
|
+
@parselog.warn line
|
235
|
+
@parselog.warn "Found comment lines embedded in the log file ... resetting to nil"
|
236
|
+
parsed_data[:p_logformat] = nil
|
237
|
+
else # parsing the field names
|
238
|
+
|
239
|
+
if parsed_data[:datestring]
|
240
|
+
dt = Time.parse(parsed_data[:datestring]+" "+parsed_data[:timestring])
|
241
|
+
parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)
|
242
|
+
parsed_data[:datetime] = parsed_data[:datetime].getlocal if @use_local_time
|
243
|
+
end
|
244
|
+
|
245
|
+
if parsed_data[:dtstring]
|
246
|
+
parsed_data[:dtstring] = parsed_data[:dtstring][1...-1]
|
247
|
+
parsed_data[:dtstring] = parsed_data[:dtstring].sub(":", " ")
|
248
|
+
dt = Time.parse(parsed_data[:dtstring])
|
249
|
+
parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)
|
250
|
+
parsed_data[:datetime] = parsed_data[:datetime].getlocal if @use_local_time
|
251
|
+
end
|
252
|
+
|
253
|
+
if parsed_data[:request]
|
254
|
+
# splitrequest = parsed_data[:request].gsub("/", " ").split
|
255
|
+
splitrequest = parsed_data[:request].split(' ')
|
256
|
+
parsed_data[:csuristem] = splitrequest[1]
|
257
|
+
end
|
258
|
+
|
259
|
+
# Now classify visitortype based on logger yml rules ...
|
260
|
+
|
261
|
+
parsed_data[:p_pageflag] = false
|
262
|
+
if @accept_only_homepage
|
263
|
+
#p @homepage
|
264
|
+
#p parsed_data[:csuristem]
|
265
|
+
parsed_data[:p_pageflag] = true if parsed_data[:csuristem].downcase.index(@homepage) == 0
|
266
|
+
else
|
267
|
+
parsed_data[:p_pageflag] = true if match_partial(parsed_data[:csuristem], @page_urls)
|
268
|
+
end
|
269
|
+
parsed_data[:p_pageflag] = false if @hide_urls and match_partial(parsed_data[:csuristem], @hide_urls)
|
270
|
+
|
271
|
+
parsed_data[:p_visitortype] = "D" if @drops and @drops.index(parsed_data[:ip])
|
272
|
+
parsed_data[:p_visitortype] = "S" if @sips and@sips.index(parsed_data[:ip])
|
273
|
+
|
274
|
+
if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent], @snames)
|
275
|
+
parsed_data[:p_visitortype] = "S"
|
276
|
+
end
|
277
|
+
|
278
|
+
if parsed_data[:referer]
|
279
|
+
y = (/(search\?\S*?[pq])=(\S*?)(&)/).match(parsed_data[:referer])
|
280
|
+
parsed_data[:p_searchphrase] = y[2] if y != nil
|
281
|
+
if @drop_refers_by_hostname
|
282
|
+
parsed_data[:p_visitortype] = "D" if parsed_data[:referer].index(@hostname) != nil
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
if @match_page_numbers and parsed_data[:p_pageflag]
|
287
|
+
parsed_data[:p_pageid] = find_assigned_number(parsed_data[:csuristem], @assigned_numbers)
|
288
|
+
# p ">>" + parsed_data[:p_pageid].to_s if parsed_data[:p_pageid]
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
parsed_data
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
# This class creates and stores information related to each visit
|
297
|
+
# Visits are determined on the basis of the IP Address hits during a timed interval
|
298
|
+
#
|
299
|
+
class Visit
|
300
|
+
attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :pages, :robots, :visitortype, :searchphrase
|
301
|
+
|
302
|
+
# This method calculates the rank
|
303
|
+
def calculate_rank(pages, duration, visitortype)
|
304
|
+
ranktotal = [pages,9].min*10 + [duration/60,9].min
|
305
|
+
rank = ((ranktotal+10)/20).round
|
306
|
+
rank = 1 if rank == 0
|
307
|
+
rank = -rank if visitortype == "S"
|
308
|
+
rank = 0 if visitortype == "D"
|
309
|
+
return rank
|
310
|
+
end
|
311
|
+
|
312
|
+
# This method extracts the name of a downloaded file from the csuriquery value
|
313
|
+
def get_download(csuriquery, timetaken)
|
314
|
+
download = nil
|
315
|
+
if timetaken.to_i > 4000
|
316
|
+
split_uri = csuriquery.split("file=")
|
317
|
+
download = split_uri[1]
|
318
|
+
p download
|
319
|
+
end
|
320
|
+
return download
|
321
|
+
end
|
322
|
+
|
323
|
+
# The method completes the initialization and update methods
|
324
|
+
def add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
|
325
|
+
@searchphrase = p_searchphrase if p_searchphrase
|
326
|
+
@rank = calculate_rank(@pages, @duration, @visitortype)
|
327
|
+
@pageids = []
|
328
|
+
if p_pageid
|
329
|
+
@pageids << p_pageid
|
330
|
+
else
|
331
|
+
z=(/(PageID)=(\d+)/).match(csuriquery)
|
332
|
+
if z
|
333
|
+
p_pageid = z[2].to_i
|
334
|
+
@pageids << p_pageid
|
335
|
+
@download_file = get_download(csuriquery, timetaken) if p_pageid == @@download_page_number
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
# This method initializes the Visit object. Load object with parsed data
|
341
|
+
def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
|
342
|
+
@ip = ip
|
343
|
+
@start_dt = log_dt
|
344
|
+
@expire_dt = @start_dt + @@visit_timeout
|
345
|
+
@end_dt = @start_dt
|
346
|
+
@duration = 0
|
347
|
+
@hits = 0
|
348
|
+
@pages = 0
|
349
|
+
@pages = 1 if p_pageflag
|
350
|
+
@visitortype = p_visitortype
|
351
|
+
@visitortype = "S" if csuristem == "/robots.txt"
|
352
|
+
@searchphrase = ""
|
353
|
+
add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
|
354
|
+
end
|
355
|
+
|
356
|
+
# This method updates the Visit object with new parsed data
|
357
|
+
def update(end_dt, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
|
358
|
+
@end_dt = end_dt
|
359
|
+
@duration = (@end_dt - @start_dt).to_i
|
360
|
+
@hits = @hits + 1
|
361
|
+
@pages = @pages + 1 if p_pageflag
|
362
|
+
@visitortype = p_visitortype if @visitortype == "H"
|
363
|
+
add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
|
364
|
+
end
|
365
|
+
|
366
|
+
def sendoutput
|
367
|
+
#if @rank > 0
|
368
|
+
iplong = @ip.to_s+" "
|
369
|
+
p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@visitortype+" Hits> "+@hits.to_s+" Pgs> "+@pages.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s
|
370
|
+
p" Phrase> "+@searchphrase if @searchphrase.length > 0
|
371
|
+
p @pageids if @pageids.length > 0
|
372
|
+
#end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
# This class creates and manages a list to keep track of the visits that are in process (cached)
|
377
|
+
# Once a visit reaches the time interval, an output transaction is generated and the visit is removed from the list
|
378
|
+
#
|
379
|
+
class VisitList
|
380
|
+
def initialize
|
381
|
+
@visits = Array.new
|
382
|
+
end
|
383
|
+
|
384
|
+
def append(visit)
|
385
|
+
@visits.push(visit)
|
386
|
+
self
|
387
|
+
end
|
388
|
+
|
389
|
+
def delete(visit)
|
390
|
+
@visits.delete(visit)
|
391
|
+
end
|
392
|
+
|
393
|
+
def find_all
|
394
|
+
@visits
|
395
|
+
end
|
396
|
+
|
397
|
+
def find_by_ip(ip)
|
398
|
+
@visits.find { |visit| ip == visit.ip }
|
399
|
+
end
|
400
|
+
|
401
|
+
def find_expired(test_dt)
|
402
|
+
@visits.find { |visit| visit.expire_dt < test_dt }
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
end
|
data/lib/vizi_tracker.rb
ADDED
data/log/parse.log
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# Logfile created on 2011-06-05 21:46:32 -0400 by logger.rb/25413
|
2
|
+
W, [2011-06-05T21:46:32.006263 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
3
|
+
|
4
|
+
W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
5
|
+
W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
6
|
+
|
7
|
+
W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
8
|
+
W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
9
|
+
|
10
|
+
W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
11
|
+
W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
12
|
+
|
13
|
+
W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
14
|
+
W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
|
15
|
+
|
16
|
+
W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
17
|
+
W, [2011-06-05T22:08:53.528629 #560] WARN -- :
|
18
|
+
|
19
|
+
W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
20
|
+
W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
21
|
+
|
22
|
+
W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
23
|
+
W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
24
|
+
|
25
|
+
W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
26
|
+
W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
27
|
+
|
28
|
+
W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
29
|
+
W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
30
|
+
|
31
|
+
W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
32
|
+
W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
33
|
+
|
34
|
+
W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
35
|
+
W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
36
|
+
|
37
|
+
W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
38
|
+
W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
39
|
+
|
40
|
+
W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
41
|
+
W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
42
|
+
|
43
|
+
W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
44
|
+
W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
45
|
+
|
46
|
+
W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
47
|
+
W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
48
|
+
|
49
|
+
W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
50
|
+
W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
51
|
+
|
52
|
+
W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
53
|
+
W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
54
|
+
|
55
|
+
W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
56
|
+
W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
57
|
+
|
58
|
+
W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
59
|
+
W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
60
|
+
|
61
|
+
W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
62
|
+
W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
63
|
+
|
64
|
+
W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
65
|
+
W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
66
|
+
|
67
|
+
W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
68
|
+
W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
69
|
+
|
70
|
+
W, [2011-06-10T22:04:33.846398 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
71
|
+
W, [2011-06-10T22:04:34.038272 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
72
|
+
|
73
|
+
W, [2011-06-10T22:04:34.038371 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
74
|
+
W, [2011-06-10T22:04:34.123624 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
75
|
+
|
76
|
+
W, [2011-06-10T22:04:34.123712 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
77
|
+
W, [2011-06-10T22:04:34.221506 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
78
|
+
|
79
|
+
W, [2011-06-10T22:04:34.221596 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|