vizi_tracker 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +116 -0
- data/Rakefile +44 -0
- data/config/logger-backup.yml +49 -0
- data/config/logger.yml +69 -0
- data/config/logger_apache.yml +65 -0
- data/config/logger_sample.yml +69 -0
- data/data/exlog.log +5458 -0
- data/data/sample-alter.log +11870 -0
- data/data/sample-surf.log +47 -0
- data/data/sample-wle.log +30474 -0
- data/data/testlog.log +270 -0
- data/data/vizitrax.log +17951 -0
- data/doc/Object.html +200 -0
- data/doc/ParserTest.html +268 -0
- data/doc/README_rdoc.html +128 -0
- data/doc/Rakefile.html +148 -0
- data/doc/Visit.html +487 -0
- data/doc/VisitList.html +385 -0
- data/doc/Vizi/LogFormat.html +377 -0
- data/doc/Vizi/LogParser.html +551 -0
- data/doc/Vizi/Visit.html +487 -0
- data/doc/Vizi/VisitList.html +386 -0
- data/doc/Vizi.html +168 -0
- data/doc/ViziLogFormat.html +382 -0
- data/doc/ViziLogParser.html +551 -0
- data/doc/created.rid +8 -0
- data/doc/formats/apache-custom-log.pdf +0 -0
- data/doc/formats/apache.rtf +238 -0
- data/doc/formats/format-descriptions.xls +0 -0
- data/doc/formats/w3c-extended.pdf +0 -0
- data/doc/formats/w3c-extended.rtf +135 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +112 -0
- data/doc/js/darkfish.js +116 -0
- data/doc/js/jquery.js +32 -0
- data/doc/js/quicksearch.js +114 -0
- data/doc/js/thickbox-compressed.js +10 -0
- data/doc/lib/vizi/parser_rb.html +63 -0
- data/doc/lib/vizi/vizi_tracker_rb.html +63 -0
- data/doc/lib/vizi_log_parser_rb.html +56 -0
- data/doc/lib/vizi_tracker_rb.html +56 -0
- data/doc/rdoc.css +759 -0
- data/doc/test/parser_test_rb.html +54 -0
- data/doc/test/test_helper_rb.html +56 -0
- data/doc/testit_rb.html +63 -0
- data/lib/vizi/vizi_tracker.rb +406 -0
- data/lib/vizi_tracker.rb +5 -0
- data/log/parse.log +79 -0
- data/log/system.log +66 -0
- data/test/parser_test.rb +48 -0
- data/test/test_helper.rb +3 -0
- data/testit.rb +105 -0
- data/vizi_tracker.gemspec +21 -0
- metadata +146 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: parser_test.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="../js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="../js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="../js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="../js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">2011-05-11 20:32:49 -0400</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
<li>uri</li>
|
35
|
+
|
36
|
+
</ul>
|
37
|
+
</dd>
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
</dl>
|
42
|
+
</div>
|
43
|
+
|
44
|
+
<div id="documentation">
|
45
|
+
|
46
|
+
<div class="description">
|
47
|
+
<h2>Description</h2>
|
48
|
+
|
49
|
+
</div>
|
50
|
+
|
51
|
+
</div>
|
52
|
+
</body>
|
53
|
+
</html>
|
54
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: test_helper.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="../js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="../js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="../js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="../js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">2011-05-11 20:32:49 -0400</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
<li>test/unit</li>
|
35
|
+
|
36
|
+
<li>ruby-debug</li>
|
37
|
+
|
38
|
+
</ul>
|
39
|
+
</dd>
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
</dl>
|
44
|
+
</div>
|
45
|
+
|
46
|
+
<div id="documentation">
|
47
|
+
|
48
|
+
<div class="description">
|
49
|
+
<h2>Description</h2>
|
50
|
+
|
51
|
+
</div>
|
52
|
+
|
53
|
+
</div>
|
54
|
+
</body>
|
55
|
+
</html>
|
56
|
+
|
data/doc/testit_rb.html
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: testit.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="./rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="./js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="./js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="./js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="./js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">2011-05-21 12:58:06 -0400</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
<li>c:\rails\vizi_tracker\lib\vizi_tracker</li>
|
35
|
+
|
36
|
+
<li>time</li>
|
37
|
+
|
38
|
+
<li>yaml</li>
|
39
|
+
|
40
|
+
<li>logger</li>
|
41
|
+
|
42
|
+
</ul>
|
43
|
+
</dd>
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
</dl>
|
48
|
+
</div>
|
49
|
+
|
50
|
+
<div id="documentation">
|
51
|
+
|
52
|
+
<div class="description">
|
53
|
+
<h2>Description</h2>
|
54
|
+
|
55
|
+
<p>This is a sample application that uses the Vizitracker gem classes Modified
|
56
|
+
by ...</p>
|
57
|
+
|
58
|
+
</div>
|
59
|
+
|
60
|
+
</div>
|
61
|
+
</body>
|
62
|
+
</html>
|
63
|
+
|
@@ -0,0 +1,406 @@
|
|
1
|
+
# This gem module provides a set of classes to support the parsing of web log files and
|
2
|
+
# the creation of Visit records from the individual parsed web log records
|
3
|
+
#
|
4
|
+
# The LogFormat and LogParser classes were derived in part from an Apache logger application
|
5
|
+
# developed by Jan Wikholm. These two classes were extended to support both Apache and IIS
|
6
|
+
# web logs. The details from the web logs are assembled to compose Visit objects and Visit
|
7
|
+
# history detail.
|
8
|
+
#
|
9
|
+
# Author:: Al Kivi <al.kivi@vizitrax.com>
|
10
|
+
# License:: MIT
|
11
|
+
|
12
|
+
module Vizi
|
13
|
+
# This class handles the parsing of each line in the log file
|
14
|
+
class LogFormat
|
15
|
+
attr_reader :name, :format, :format_symbols, :format_regex
|
16
|
+
|
17
|
+
DIRECTIVES = {
|
18
|
+
# format string char => [:symbol to use, /regex to use when matching against log/]
|
19
|
+
'h' => [:ip, /\d+\.\d+\.\d+\.\d+/], # apache and IIS: called c-ip in IIS
|
20
|
+
'p' => [:sip, /\d+\.\d+\.\d+\.\d+/], # IIS:
|
21
|
+
'g' => [:auth, /\S*/], # apache:
|
22
|
+
'u' => [:username, /\S*/], # apache and IIS: called cs-username in IIS
|
23
|
+
't' => [:dtstring, /\[.*?\]/], # apache: one field with date and time
|
24
|
+
'd' => [:datestring, /\d+\-\d+\-\d+/], # IIS:
|
25
|
+
'e' => [:timestring, /\d+\:\d+\:\d+/], # IIS:
|
26
|
+
'r' => [:request, /.*?/], # apache: includes both csmethod and csuristem
|
27
|
+
'm' => [:csmethod, /\w*?/], # IIS:
|
28
|
+
'w' => [:csuristem, /\S*/], # IIS:
|
29
|
+
's' => [:status, /\d+/], # apache and IIS: is called sc_status in IIS
|
30
|
+
'b' => [:bytecount, /-|\d+/], # apache and IIS: is called cs_bytes in IIS
|
31
|
+
'v' => [:domain, /.*?/], # apache and IIS: is c-computername in IIS
|
32
|
+
'i' => [:header_lines, /.*?/], # apache: transforms to useragent or referer or cookies
|
33
|
+
'a' => [:useragent, /\S*/], # IIS:
|
34
|
+
'j' => [:referer, /\S*/], # IIS:
|
35
|
+
'k' => [:cscookie, /\d+/], # IIS:
|
36
|
+
'q' => [:csuriquery, /.*/], # IIS:
|
37
|
+
'y' => [:csbytes, /d+/], # IIS:
|
38
|
+
'o' => [:sport, /\d+/], # IIS:
|
39
|
+
'x' => [:scsubstatus, /\d+/], # IIS:
|
40
|
+
'z' => [:cshost, /\d+/], # IIS:
|
41
|
+
'l' => [:win32status, /\d+/], # IIS:
|
42
|
+
'n' => [:timetaken, /\d+/], # IIS:
|
43
|
+
'c' => [:comment, /^#/], # IIS: comment line identifier
|
44
|
+
'f' => [:fields, /^#Fields:/] # IIS: field line identifier
|
45
|
+
}
|
46
|
+
|
47
|
+
# This method initializes the LogFormat object with fieldnames and log formats
|
48
|
+
def initialize(name, format)
|
49
|
+
@name, @format = name, format
|
50
|
+
parse_format(format)
|
51
|
+
end
|
52
|
+
|
53
|
+
# The symbols are used to map the log to the env variables
|
54
|
+
# The regex is used when checking what format the log is and to extract data
|
55
|
+
def parse_format(format)
|
56
|
+
format_directive = /%(.*?)(\{.*?\})?([#{[DIRECTIVES.keys.join('|')]}])([\s\\"]*)/
|
57
|
+
log_format_symbols = []
|
58
|
+
format_regex = ""
|
59
|
+
format.scan(format_directive) do |condition, subdirective, directive_char, ignored|
|
60
|
+
log_format, match_regex = process_directive(directive_char, subdirective, condition)
|
61
|
+
ignored.gsub!(/\s/, '\\s') unless ignored.nil?
|
62
|
+
log_format_symbols << log_format
|
63
|
+
format_regex << "(#{match_regex})#{ignored}"
|
64
|
+
end
|
65
|
+
@format_symbols = log_format_symbols
|
66
|
+
@format_regex = /^#{format_regex}/
|
67
|
+
end
|
68
|
+
|
69
|
+
def process_directive(directive_char, subdirective, condition)
|
70
|
+
directive = DIRECTIVES[directive_char]
|
71
|
+
case directive_char
|
72
|
+
when 'i'
|
73
|
+
log_format = subdirective[1...-1].downcase.tr('-', '_').to_sym
|
74
|
+
[log_format, directive[1].source]
|
75
|
+
else
|
76
|
+
[directive[0], directive[1].source]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# This class handles the parsing of each line in the log file
|
82
|
+
class LogParser
|
83
|
+
require 'time'
|
84
|
+
|
85
|
+
LOG_FORMATS = {
|
86
|
+
:common => '%h %g %u %t \"%r\" %>s %b',
|
87
|
+
:common_with_virtual => '%v %h %g %u %t \"%r\" %>s %b',
|
88
|
+
:combined => '%h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
|
89
|
+
:combined_with_virtual => '%v %h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
|
90
|
+
:combined_with_cookies => '%h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookies}i\"',
|
91
|
+
:w3c_c => '%c', # format is comment ... still looking
|
92
|
+
:w3c_f => '%f' # format is IIS fields ... ready to parse
|
93
|
+
}
|
94
|
+
|
95
|
+
FIELDNAMES = {
|
96
|
+
'c-ip' => 'h',
|
97
|
+
's-ip' => 'p',
|
98
|
+
'cs-username' => 'u',
|
99
|
+
'sc-status' => 's',
|
100
|
+
'cs-bytes' => 'y',
|
101
|
+
'sc-bytes' => 'b',
|
102
|
+
'date' => 'd',
|
103
|
+
'time' => 'e',
|
104
|
+
'cs-method' => 'm',
|
105
|
+
'cs-uri-stem' => 'w',
|
106
|
+
'cs-uri-query' => 'q',
|
107
|
+
'cs(Referer)' => 'j', # internal shortened to referer
|
108
|
+
'cs(User-Agent)' => 'a', # internal shortened to useragent
|
109
|
+
'cs(Cookie)' => 'k', # internal shortened to cscookie
|
110
|
+
's-port' => 'o',
|
111
|
+
'cs-host' => 'z',
|
112
|
+
'sc-substatus' => 'x',
|
113
|
+
'sc-win32-status' => 'l',
|
114
|
+
'time-taken' => 'n',
|
115
|
+
's-computername' => 'v'
|
116
|
+
}
|
117
|
+
|
118
|
+
attr_reader :known_formats
|
119
|
+
|
120
|
+
#@@log = ActiveRecord::Base.logger
|
121
|
+
|
122
|
+
# This method initialises LogParser object and loads the configurable logger control items
|
123
|
+
def initialize(drop_ips, spider_ips, spider_names, page_urls, hide_urls, homepage, accept_only_homepage,
|
124
|
+
hostname, drop_refers_by_hostname, use_local_time, assigned_numbers, match_page_numbers)
|
125
|
+
@drops = drop_ips
|
126
|
+
@sips = spider_ips
|
127
|
+
@snames = spider_names
|
128
|
+
@page_urls = page_urls
|
129
|
+
@hide_urls = hide_urls
|
130
|
+
@homepage = homepage
|
131
|
+
@accept_only_homepage = accept_only_homepage
|
132
|
+
@hostname = hostname
|
133
|
+
@drop_refers_by_hostname = drop_refers_by_hostname
|
134
|
+
@use_local_time = use_local_time
|
135
|
+
@assigned_numbers = assigned_numbers
|
136
|
+
@match_page_numbers = match_page_numbers
|
137
|
+
@log_format = []
|
138
|
+
initialize_known_formats
|
139
|
+
@parselog = Logger.new('./log/parse.log', shift_age = 'weekly')
|
140
|
+
@parselog.level = Logger::WARN
|
141
|
+
end
|
142
|
+
|
143
|
+
# Processes the format string into symbols and test regex and saves using LogFormat class
|
144
|
+
def initialize_known_formats
|
145
|
+
@known_formats = {}
|
146
|
+
LOG_FORMATS.each do |name, format|
|
147
|
+
@known_formats[name] = Vizi::LogFormat.new(name, format)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Checks which standard the log file (well one line) is
|
152
|
+
# Automatically checks for most complex (longest) regex first ...
|
153
|
+
def check_format(line)
|
154
|
+
@known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format|
|
155
|
+
return key if line.match(log_format.format_regex)
|
156
|
+
}
|
157
|
+
return :unknown
|
158
|
+
end
|
159
|
+
|
160
|
+
# Builds the format from the IIS fielnames
|
161
|
+
def build_format(line)
|
162
|
+
fields = line.split(' ')
|
163
|
+
i = 1
|
164
|
+
@format = ""
|
165
|
+
while i < fields.length
|
166
|
+
@format << "%"+FIELDNAMES[fields[i]]+" "
|
167
|
+
i = i + 1
|
168
|
+
end
|
169
|
+
return @format
|
170
|
+
end
|
171
|
+
|
172
|
+
# Match a partial string in field against an external field array
|
173
|
+
def match_partial (field, fldarray)
|
174
|
+
hit = nil
|
175
|
+
i = 0
|
176
|
+
while i < fldarray.length
|
177
|
+
hit = field.index(fldarray[i])
|
178
|
+
break if hit
|
179
|
+
i = i + 1
|
180
|
+
end
|
181
|
+
return hit
|
182
|
+
end
|
183
|
+
|
184
|
+
# Find an assigned number from matching string against an external field array
|
185
|
+
def find_assigned_number (field, fldarray)
|
186
|
+
pageid = 0
|
187
|
+
i = 0
|
188
|
+
while i < fldarray.length
|
189
|
+
hit = fldarray[i].index(field)
|
190
|
+
if hit
|
191
|
+
z = fldarray[i].split(',')
|
192
|
+
pageid = z[1].to_i
|
193
|
+
break
|
194
|
+
end
|
195
|
+
i = i + 1
|
196
|
+
end
|
197
|
+
return pageid
|
198
|
+
end
|
199
|
+
|
200
|
+
# apache files ... regex the file to determine logformat name
|
201
|
+
# IIS files ... parse the fields string to determine the file contents
|
202
|
+
def parse_line(line, logformat)
|
203
|
+
if logformat != nil
|
204
|
+
log_format = logformat # get log_format string
|
205
|
+
@format_name = "temp"
|
206
|
+
data = line.split(' ')
|
207
|
+
else
|
208
|
+
@format_name = check_format(line) # look for matching formats, check each time
|
209
|
+
log_format = @known_formats[@format_name] # found a matched format
|
210
|
+
raise ArgumentError if log_format.nil? or line !~ log_format.format_regex
|
211
|
+
data = line.downcase.scan(log_format.format_regex).flatten
|
212
|
+
end
|
213
|
+
parsed_data = {}
|
214
|
+
log_format.format_symbols.size.times do |i|
|
215
|
+
parsed_data[log_format.format_symbols[i]] = data[i] # load data for each format_symbol
|
216
|
+
end
|
217
|
+
|
218
|
+
parsed_data[:p_logformatname] = @format_name.to_s
|
219
|
+
parsed_data[:p_logformat] = logformat
|
220
|
+
parsed_data[:p_visitortype] = "H" # set default visitor type (H)uman
|
221
|
+
parsed_data[:p_linetype] = "V" # linetype is (V)isitors
|
222
|
+
parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
|
223
|
+
if @format_name.to_s == "w3c_f" # IIS file name ... generic
|
224
|
+
@format = build_format(line) # parse fields to get log_format
|
225
|
+
temp_format = Vizi::LogFormat.new(:temp, @format) # create temp format
|
226
|
+
parsed_data[:p_logformat] = temp_format # shuttle the log_format object
|
227
|
+
parsed_data[:p_logformatname] = "iis" # change the name to iis
|
228
|
+
parsed_data[:p_linetype] = "F" # linetype to (F)ield list
|
229
|
+
parsed_data[:p_visitortype] = "-" # visitor type not relevant
|
230
|
+
elsif @format_name.to_s == "w3c_c" # found IIS file in comments section
|
231
|
+
parsed_data[:p_linetype] = "C" # linetype is (C)omment
|
232
|
+
parsed_data[:p_visitortype] = "-"
|
233
|
+
elsif parsed_data[:p_linetype] == "C"
|
234
|
+
@parselog.warn line
|
235
|
+
@parselog.warn "Found comment lines embedded in the log file ... resetting to nil"
|
236
|
+
parsed_data[:p_logformat] = nil
|
237
|
+
else # parsing the field names
|
238
|
+
|
239
|
+
if parsed_data[:datestring]
|
240
|
+
dt = Time.parse(parsed_data[:datestring]+" "+parsed_data[:timestring])
|
241
|
+
parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)
|
242
|
+
parsed_data[:datetime] = parsed_data[:datetime].getlocal if @use_local_time
|
243
|
+
end
|
244
|
+
|
245
|
+
if parsed_data[:dtstring]
|
246
|
+
parsed_data[:dtstring] = parsed_data[:dtstring][1...-1]
|
247
|
+
parsed_data[:dtstring] = parsed_data[:dtstring].sub(":", " ")
|
248
|
+
dt = Time.parse(parsed_data[:dtstring])
|
249
|
+
parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)
|
250
|
+
parsed_data[:datetime] = parsed_data[:datetime].getlocal if @use_local_time
|
251
|
+
end
|
252
|
+
|
253
|
+
if parsed_data[:request]
|
254
|
+
# splitrequest = parsed_data[:request].gsub("/", " ").split
|
255
|
+
splitrequest = parsed_data[:request].split(' ')
|
256
|
+
parsed_data[:csuristem] = splitrequest[1]
|
257
|
+
end
|
258
|
+
|
259
|
+
# Now classify visitortype based on logger yml rules ...
|
260
|
+
|
261
|
+
parsed_data[:p_pageflag] = false
|
262
|
+
if @accept_only_homepage
|
263
|
+
#p @homepage
|
264
|
+
#p parsed_data[:csuristem]
|
265
|
+
parsed_data[:p_pageflag] = true if parsed_data[:csuristem].downcase.index(@homepage) == 0
|
266
|
+
else
|
267
|
+
parsed_data[:p_pageflag] = true if match_partial(parsed_data[:csuristem], @page_urls)
|
268
|
+
end
|
269
|
+
parsed_data[:p_pageflag] = false if @hide_urls and match_partial(parsed_data[:csuristem], @hide_urls)
|
270
|
+
|
271
|
+
parsed_data[:p_visitortype] = "D" if @drops and @drops.index(parsed_data[:ip])
|
272
|
+
parsed_data[:p_visitortype] = "S" if @sips and@sips.index(parsed_data[:ip])
|
273
|
+
|
274
|
+
if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent], @snames)
|
275
|
+
parsed_data[:p_visitortype] = "S"
|
276
|
+
end
|
277
|
+
|
278
|
+
if parsed_data[:referer]
|
279
|
+
y = (/(search\?\S*?[pq])=(\S*?)(&)/).match(parsed_data[:referer])
|
280
|
+
parsed_data[:p_searchphrase] = y[2] if y != nil
|
281
|
+
if @drop_refers_by_hostname
|
282
|
+
parsed_data[:p_visitortype] = "D" if parsed_data[:referer].index(@hostname) != nil
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
if @match_page_numbers and parsed_data[:p_pageflag]
|
287
|
+
parsed_data[:p_pageid] = find_assigned_number(parsed_data[:csuristem], @assigned_numbers)
|
288
|
+
# p ">>" + parsed_data[:p_pageid].to_s if parsed_data[:p_pageid]
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
parsed_data
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
# This class creates and stores information related to each visit
|
297
|
+
# Visits are determined on the basis of the IP Address hits during a timed interval
|
298
|
+
#
|
299
|
+
class Visit
|
300
|
+
attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :pages, :robots, :visitortype, :searchphrase
|
301
|
+
|
302
|
+
# This method calculates the rank
|
303
|
+
def calculate_rank(pages, duration, visitortype)
|
304
|
+
ranktotal = [pages,9].min*10 + [duration/60,9].min
|
305
|
+
rank = ((ranktotal+10)/20).round
|
306
|
+
rank = 1 if rank == 0
|
307
|
+
rank = -rank if visitortype == "S"
|
308
|
+
rank = 0 if visitortype == "D"
|
309
|
+
return rank
|
310
|
+
end
|
311
|
+
|
312
|
+
# This method extracts the name of a downloaded file from the csuriquery value
|
313
|
+
def get_download(csuriquery, timetaken)
|
314
|
+
download = nil
|
315
|
+
if timetaken.to_i > 4000
|
316
|
+
split_uri = csuriquery.split("file=")
|
317
|
+
download = split_uri[1]
|
318
|
+
p download
|
319
|
+
end
|
320
|
+
return download
|
321
|
+
end
|
322
|
+
|
323
|
+
# The method completes the initialization and update methods
|
324
|
+
def add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
|
325
|
+
@searchphrase = p_searchphrase if p_searchphrase
|
326
|
+
@rank = calculate_rank(@pages, @duration, @visitortype)
|
327
|
+
@pageids = []
|
328
|
+
if p_pageid
|
329
|
+
@pageids << p_pageid
|
330
|
+
else
|
331
|
+
z=(/(PageID)=(\d+)/).match(csuriquery)
|
332
|
+
if z
|
333
|
+
p_pageid = z[2].to_i
|
334
|
+
@pageids << p_pageid
|
335
|
+
@download_file = get_download(csuriquery, timetaken) if p_pageid == @@download_page_number
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
# This method initializes the Visit object. Load object with parsed data
|
341
|
+
def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
|
342
|
+
@ip = ip
|
343
|
+
@start_dt = log_dt
|
344
|
+
@expire_dt = @start_dt + @@visit_timeout
|
345
|
+
@end_dt = @start_dt
|
346
|
+
@duration = 0
|
347
|
+
@hits = 0
|
348
|
+
@pages = 0
|
349
|
+
@pages = 1 if p_pageflag
|
350
|
+
@visitortype = p_visitortype
|
351
|
+
@visitortype = "S" if csuristem == "/robots.txt"
|
352
|
+
@searchphrase = ""
|
353
|
+
add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
|
354
|
+
end
|
355
|
+
|
356
|
+
# This method updates the Visit object with new parsed data
|
357
|
+
def update(end_dt, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
|
358
|
+
@end_dt = end_dt
|
359
|
+
@duration = (@end_dt - @start_dt).to_i
|
360
|
+
@hits = @hits + 1
|
361
|
+
@pages = @pages + 1 if p_pageflag
|
362
|
+
@visitortype = p_visitortype if @visitortype == "H"
|
363
|
+
add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
|
364
|
+
end
|
365
|
+
|
366
|
+
def sendoutput
|
367
|
+
#if @rank > 0
|
368
|
+
iplong = @ip.to_s+" "
|
369
|
+
p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@visitortype+" Hits> "+@hits.to_s+" Pgs> "+@pages.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s
|
370
|
+
p" Phrase> "+@searchphrase if @searchphrase.length > 0
|
371
|
+
p @pageids if @pageids.length > 0
|
372
|
+
#end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
# This class creates and manages a list to keep track of the visits that are in process (cached)
|
377
|
+
# Once a visit reaches the time interval, an output transaction is generated and the visit is removed from the list
|
378
|
+
#
|
379
|
+
class VisitList
|
380
|
+
def initialize
|
381
|
+
@visits = Array.new
|
382
|
+
end
|
383
|
+
|
384
|
+
def append(visit)
|
385
|
+
@visits.push(visit)
|
386
|
+
self
|
387
|
+
end
|
388
|
+
|
389
|
+
def delete(visit)
|
390
|
+
@visits.delete(visit)
|
391
|
+
end
|
392
|
+
|
393
|
+
def find_all
|
394
|
+
@visits
|
395
|
+
end
|
396
|
+
|
397
|
+
def find_by_ip(ip)
|
398
|
+
@visits.find { |visit| ip == visit.ip }
|
399
|
+
end
|
400
|
+
|
401
|
+
def find_expired(test_dt)
|
402
|
+
@visits.find { |visit| visit.expire_dt < test_dt }
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
end
|
data/lib/vizi_tracker.rb
ADDED
data/log/parse.log
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# Logfile created on 2011-06-05 21:46:32 -0400 by logger.rb/25413
|
2
|
+
W, [2011-06-05T21:46:32.006263 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
3
|
+
|
4
|
+
W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
5
|
+
W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
6
|
+
|
7
|
+
W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
8
|
+
W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
9
|
+
|
10
|
+
W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
11
|
+
W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
12
|
+
|
13
|
+
W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
14
|
+
W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
|
15
|
+
|
16
|
+
W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
17
|
+
W, [2011-06-05T22:08:53.528629 #560] WARN -- :
|
18
|
+
|
19
|
+
W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
20
|
+
W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
21
|
+
|
22
|
+
W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
23
|
+
W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
24
|
+
|
25
|
+
W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
26
|
+
W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
27
|
+
|
28
|
+
W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
29
|
+
W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
30
|
+
|
31
|
+
W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
32
|
+
W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
33
|
+
|
34
|
+
W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
35
|
+
W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
36
|
+
|
37
|
+
W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
38
|
+
W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
39
|
+
|
40
|
+
W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
41
|
+
W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
42
|
+
|
43
|
+
W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
44
|
+
W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
45
|
+
|
46
|
+
W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
47
|
+
W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
48
|
+
|
49
|
+
W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
50
|
+
W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
51
|
+
|
52
|
+
W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
53
|
+
W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
54
|
+
|
55
|
+
W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
56
|
+
W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
57
|
+
|
58
|
+
W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
59
|
+
W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
60
|
+
|
61
|
+
W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
62
|
+
W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
63
|
+
|
64
|
+
W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
65
|
+
W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
66
|
+
|
67
|
+
W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
68
|
+
W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
69
|
+
|
70
|
+
W, [2011-06-10T22:04:33.846398 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
71
|
+
W, [2011-06-10T22:04:34.038272 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
72
|
+
|
73
|
+
W, [2011-06-10T22:04:34.038371 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
74
|
+
W, [2011-06-10T22:04:34.123624 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
75
|
+
|
76
|
+
W, [2011-06-10T22:04:34.123712 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
77
|
+
W, [2011-06-10T22:04:34.221506 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
78
|
+
|
79
|
+
W, [2011-06-10T22:04:34.221596 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|