vizi_tracker 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/README.rdoc +116 -0
  2. data/Rakefile +44 -0
  3. data/config/logger-backup.yml +49 -0
  4. data/config/logger.yml +69 -0
  5. data/config/logger_apache.yml +65 -0
  6. data/config/logger_sample.yml +69 -0
  7. data/data/exlog.log +5458 -0
  8. data/data/sample-alter.log +11870 -0
  9. data/data/sample-surf.log +47 -0
  10. data/data/sample-wle.log +30474 -0
  11. data/data/testlog.log +270 -0
  12. data/data/vizitrax.log +17951 -0
  13. data/doc/Object.html +200 -0
  14. data/doc/ParserTest.html +268 -0
  15. data/doc/README_rdoc.html +128 -0
  16. data/doc/Rakefile.html +148 -0
  17. data/doc/Visit.html +487 -0
  18. data/doc/VisitList.html +385 -0
  19. data/doc/Vizi/LogFormat.html +377 -0
  20. data/doc/Vizi/LogParser.html +551 -0
  21. data/doc/Vizi/Visit.html +487 -0
  22. data/doc/Vizi/VisitList.html +386 -0
  23. data/doc/Vizi.html +168 -0
  24. data/doc/ViziLogFormat.html +382 -0
  25. data/doc/ViziLogParser.html +551 -0
  26. data/doc/created.rid +8 -0
  27. data/doc/formats/apache-custom-log.pdf +0 -0
  28. data/doc/formats/apache.rtf +238 -0
  29. data/doc/formats/format-descriptions.xls +0 -0
  30. data/doc/formats/w3c-extended.pdf +0 -0
  31. data/doc/formats/w3c-extended.rtf +135 -0
  32. data/doc/images/brick.png +0 -0
  33. data/doc/images/brick_link.png +0 -0
  34. data/doc/images/bug.png +0 -0
  35. data/doc/images/bullet_black.png +0 -0
  36. data/doc/images/bullet_toggle_minus.png +0 -0
  37. data/doc/images/bullet_toggle_plus.png +0 -0
  38. data/doc/images/date.png +0 -0
  39. data/doc/images/find.png +0 -0
  40. data/doc/images/loadingAnimation.gif +0 -0
  41. data/doc/images/macFFBgHack.png +0 -0
  42. data/doc/images/package.png +0 -0
  43. data/doc/images/page_green.png +0 -0
  44. data/doc/images/page_white_text.png +0 -0
  45. data/doc/images/page_white_width.png +0 -0
  46. data/doc/images/plugin.png +0 -0
  47. data/doc/images/ruby.png +0 -0
  48. data/doc/images/tag_green.png +0 -0
  49. data/doc/images/wrench.png +0 -0
  50. data/doc/images/wrench_orange.png +0 -0
  51. data/doc/images/zoom.png +0 -0
  52. data/doc/index.html +112 -0
  53. data/doc/js/darkfish.js +116 -0
  54. data/doc/js/jquery.js +32 -0
  55. data/doc/js/quicksearch.js +114 -0
  56. data/doc/js/thickbox-compressed.js +10 -0
  57. data/doc/lib/vizi/parser_rb.html +63 -0
  58. data/doc/lib/vizi/vizi_tracker_rb.html +63 -0
  59. data/doc/lib/vizi_log_parser_rb.html +56 -0
  60. data/doc/lib/vizi_tracker_rb.html +56 -0
  61. data/doc/rdoc.css +759 -0
  62. data/doc/test/parser_test_rb.html +54 -0
  63. data/doc/test/test_helper_rb.html +56 -0
  64. data/doc/testit_rb.html +63 -0
  65. data/lib/vizi/vizi_tracker.rb +406 -0
  66. data/lib/vizi_tracker.rb +5 -0
  67. data/log/parse.log +79 -0
  68. data/log/system.log +66 -0
  69. data/test/parser_test.rb +48 -0
  70. data/test/test_helper.rb +3 -0
  71. data/testit.rb +105 -0
  72. data/vizi_tracker.gemspec +21 -0
  73. metadata +146 -0
@@ -0,0 +1,54 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6
+ <head>
7
+ <meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
8
+
9
+ <title>File: parser_test.rb [RDoc Documentation]</title>
10
+
11
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
12
+
13
+ <script src="../js/jquery.js" type="text/javascript"
14
+ charset="utf-8"></script>
15
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
16
+ charset="utf-8"></script>
17
+ <script src="../js/quicksearch.js" type="text/javascript"
18
+ charset="utf-8"></script>
19
+ <script src="../js/darkfish.js" type="text/javascript"
20
+ charset="utf-8"></script>
21
+ </head>
22
+
23
+ <body class="file file-popup">
24
+ <div id="metadata">
25
+ <dl>
26
+ <dt class="modified-date">Last Modified</dt>
27
+ <dd class="modified-date">2011-05-11 20:32:49 -0400</dd>
28
+
29
+
30
+ <dt class="requires">Requires</dt>
31
+ <dd class="requires">
32
+ <ul>
33
+
34
+ <li>uri</li>
35
+
36
+ </ul>
37
+ </dd>
38
+
39
+
40
+
41
+ </dl>
42
+ </div>
43
+
44
+ <div id="documentation">
45
+
46
+ <div class="description">
47
+ <h2>Description</h2>
48
+
49
+ </div>
50
+
51
+ </div>
52
+ </body>
53
+ </html>
54
+
@@ -0,0 +1,56 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6
+ <head>
7
+ <meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
8
+
9
+ <title>File: test_helper.rb [RDoc Documentation]</title>
10
+
11
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
12
+
13
+ <script src="../js/jquery.js" type="text/javascript"
14
+ charset="utf-8"></script>
15
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
16
+ charset="utf-8"></script>
17
+ <script src="../js/quicksearch.js" type="text/javascript"
18
+ charset="utf-8"></script>
19
+ <script src="../js/darkfish.js" type="text/javascript"
20
+ charset="utf-8"></script>
21
+ </head>
22
+
23
+ <body class="file file-popup">
24
+ <div id="metadata">
25
+ <dl>
26
+ <dt class="modified-date">Last Modified</dt>
27
+ <dd class="modified-date">2011-05-11 20:32:49 -0400</dd>
28
+
29
+
30
+ <dt class="requires">Requires</dt>
31
+ <dd class="requires">
32
+ <ul>
33
+
34
+ <li>test/unit</li>
35
+
36
+ <li>ruby-debug</li>
37
+
38
+ </ul>
39
+ </dd>
40
+
41
+
42
+
43
+ </dl>
44
+ </div>
45
+
46
+ <div id="documentation">
47
+
48
+ <div class="description">
49
+ <h2>Description</h2>
50
+
51
+ </div>
52
+
53
+ </div>
54
+ </body>
55
+ </html>
56
+
@@ -0,0 +1,63 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6
+ <head>
7
+ <meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
8
+
9
+ <title>File: testit.rb [RDoc Documentation]</title>
10
+
11
+ <link type="text/css" media="screen" href="./rdoc.css" rel="stylesheet" />
12
+
13
+ <script src="./js/jquery.js" type="text/javascript"
14
+ charset="utf-8"></script>
15
+ <script src="./js/thickbox-compressed.js" type="text/javascript"
16
+ charset="utf-8"></script>
17
+ <script src="./js/quicksearch.js" type="text/javascript"
18
+ charset="utf-8"></script>
19
+ <script src="./js/darkfish.js" type="text/javascript"
20
+ charset="utf-8"></script>
21
+ </head>
22
+
23
+ <body class="file file-popup">
24
+ <div id="metadata">
25
+ <dl>
26
+ <dt class="modified-date">Last Modified</dt>
27
+ <dd class="modified-date">2011-05-21 12:58:06 -0400</dd>
28
+
29
+
30
+ <dt class="requires">Requires</dt>
31
+ <dd class="requires">
32
+ <ul>
33
+
34
+ <li>c:\rails\vizi_tracker\lib\vizi_tracker</li>
35
+
36
+ <li>time</li>
37
+
38
+ <li>yaml</li>
39
+
40
+ <li>logger</li>
41
+
42
+ </ul>
43
+ </dd>
44
+
45
+
46
+
47
+ </dl>
48
+ </div>
49
+
50
+ <div id="documentation">
51
+
52
+ <div class="description">
53
+ <h2>Description</h2>
54
+
55
+ <p>This is a sample application that uses the Vizitracker gem classes Modified
56
+ by ...</p>
57
+
58
+ </div>
59
+
60
+ </div>
61
+ </body>
62
+ </html>
63
+
@@ -0,0 +1,406 @@
1
+ # This gem module provides a set of classes to support the parsing of web log files and
2
+ # the creation of Visit records from the individual parsed web log records
3
+ #
4
+ # The LogFormat and LogParser classes were derived in part from an Apache logger application
5
+ # developed by Jan Wikholm. These two classes were extended to support both Apache and IIS
6
+ # web logs. The details from the web logs are assembled to compose Visit objects and Visit
7
+ # history detail.
8
+ #
9
+ # Author:: Al Kivi <al.kivi@vizitrax.com>
10
+ # License:: MIT
11
+
12
+ module Vizi
13
+ # This class handles the parsing of each line in the log file
14
+ class LogFormat
15
+ attr_reader :name, :format, :format_symbols, :format_regex
16
+
17
+ DIRECTIVES = {
18
+ # format string char => [:symbol to use, /regex to use when matching against log/]
19
+ 'h' => [:ip, /\d+\.\d+\.\d+\.\d+/], # apache and IIS: called c-ip in IIS
20
+ 'p' => [:sip, /\d+\.\d+\.\d+\.\d+/], # IIS:
21
+ 'g' => [:auth, /\S*/], # apache:
22
+ 'u' => [:username, /\S*/], # apache and IIS: called cs-username in IIS
23
+ 't' => [:dtstring, /\[.*?\]/], # apache: one field with date and time
24
+ 'd' => [:datestring, /\d+\-\d+\-\d+/], # IIS:
25
+ 'e' => [:timestring, /\d+\:\d+\:\d+/], # IIS:
26
+ 'r' => [:request, /.*?/], # apache: includes both csmethod and csuristem
27
+ 'm' => [:csmethod, /\w*?/], # IIS:
28
+ 'w' => [:csuristem, /\S*/], # IIS:
29
+ 's' => [:status, /\d+/], # apache and IIS: is called sc_status in IIS
30
+ 'b' => [:bytecount, /-|\d+/], # apache and IIS: is called cs_bytes in IIS
31
+ 'v' => [:domain, /.*?/], # apache and IIS: is c-computername in IIS
32
+ 'i' => [:header_lines, /.*?/], # apache: transforms to useragent or referer or cookies
33
+ 'a' => [:useragent, /\S*/], # IIS:
34
+ 'j' => [:referer, /\S*/], # IIS:
35
+ 'k' => [:cscookie, /\d+/], # IIS:
36
+ 'q' => [:csuriquery, /.*/], # IIS:
37
+ 'y' => [:csbytes, /d+/], # IIS:
38
+ 'o' => [:sport, /\d+/], # IIS:
39
+ 'x' => [:scsubstatus, /\d+/], # IIS:
40
+ 'z' => [:cshost, /\d+/], # IIS:
41
+ 'l' => [:win32status, /\d+/], # IIS:
42
+ 'n' => [:timetaken, /\d+/], # IIS:
43
+ 'c' => [:comment, /^#/], # IIS: comment line identifier
44
+ 'f' => [:fields, /^#Fields:/] # IIS: field line identifier
45
+ }
46
+
47
+ # This method initializes the LogFormat object with fieldnames and log formats
48
+ def initialize(name, format)
49
+ @name, @format = name, format
50
+ parse_format(format)
51
+ end
52
+
53
+ # The symbols are used to map the log to the env variables
54
+ # The regex is used when checking what format the log is and to extract data
55
+ def parse_format(format)
56
+ format_directive = /%(.*?)(\{.*?\})?([#{[DIRECTIVES.keys.join('|')]}])([\s\\"]*)/
57
+ log_format_symbols = []
58
+ format_regex = ""
59
+ format.scan(format_directive) do |condition, subdirective, directive_char, ignored|
60
+ log_format, match_regex = process_directive(directive_char, subdirective, condition)
61
+ ignored.gsub!(/\s/, '\\s') unless ignored.nil?
62
+ log_format_symbols << log_format
63
+ format_regex << "(#{match_regex})#{ignored}"
64
+ end
65
+ @format_symbols = log_format_symbols
66
+ @format_regex = /^#{format_regex}/
67
+ end
68
+
69
+ def process_directive(directive_char, subdirective, condition)
70
+ directive = DIRECTIVES[directive_char]
71
+ case directive_char
72
+ when 'i'
73
+ log_format = subdirective[1...-1].downcase.tr('-', '_').to_sym
74
+ [log_format, directive[1].source]
75
+ else
76
+ [directive[0], directive[1].source]
77
+ end
78
+ end
79
+ end
80
+
81
+ # This class handles the parsing of each line in the log file
82
+ class LogParser
83
+ require 'time'
84
+
85
+ LOG_FORMATS = {
86
+ :common => '%h %g %u %t \"%r\" %>s %b',
87
+ :common_with_virtual => '%v %h %g %u %t \"%r\" %>s %b',
88
+ :combined => '%h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
89
+ :combined_with_virtual => '%v %h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
90
+ :combined_with_cookies => '%h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookies}i\"',
91
+ :w3c_c => '%c', # format is comment ... still looking
92
+ :w3c_f => '%f' # format is IIS fields ... ready to parse
93
+ }
94
+
95
+ FIELDNAMES = {
96
+ 'c-ip' => 'h',
97
+ 's-ip' => 'p',
98
+ 'cs-username' => 'u',
99
+ 'sc-status' => 's',
100
+ 'cs-bytes' => 'y',
101
+ 'sc-bytes' => 'b',
102
+ 'date' => 'd',
103
+ 'time' => 'e',
104
+ 'cs-method' => 'm',
105
+ 'cs-uri-stem' => 'w',
106
+ 'cs-uri-query' => 'q',
107
+ 'cs(Referer)' => 'j', # internal shortened to referer
108
+ 'cs(User-Agent)' => 'a', # internal shortened to useragent
109
+ 'cs(Cookie)' => 'k', # internal shortened to cscookie
110
+ 's-port' => 'o',
111
+ 'cs-host' => 'z',
112
+ 'sc-substatus' => 'x',
113
+ 'sc-win32-status' => 'l',
114
+ 'time-taken' => 'n',
115
+ 's-computername' => 'v'
116
+ }
117
+
118
+ attr_reader :known_formats
119
+
120
+ #@@log = ActiveRecord::Base.logger
121
+
122
+ # This method initialises LogParser object and loads the configurable logger control items
123
+ def initialize(drop_ips, spider_ips, spider_names, page_urls, hide_urls, homepage, accept_only_homepage,
124
+ hostname, drop_refers_by_hostname, use_local_time, assigned_numbers, match_page_numbers)
125
+ @drops = drop_ips
126
+ @sips = spider_ips
127
+ @snames = spider_names
128
+ @page_urls = page_urls
129
+ @hide_urls = hide_urls
130
+ @homepage = homepage
131
+ @accept_only_homepage = accept_only_homepage
132
+ @hostname = hostname
133
+ @drop_refers_by_hostname = drop_refers_by_hostname
134
+ @use_local_time = use_local_time
135
+ @assigned_numbers = assigned_numbers
136
+ @match_page_numbers = match_page_numbers
137
+ @log_format = []
138
+ initialize_known_formats
139
+ @parselog = Logger.new('./log/parse.log', shift_age = 'weekly')
140
+ @parselog.level = Logger::WARN
141
+ end
142
+
143
+ # Processes the format string into symbols and test regex and saves using LogFormat class
144
+ def initialize_known_formats
145
+ @known_formats = {}
146
+ LOG_FORMATS.each do |name, format|
147
+ @known_formats[name] = Vizi::LogFormat.new(name, format)
148
+ end
149
+ end
150
+
151
+ # Checks which standard the log file (well one line) is
152
+ # Automatically checks for most complex (longest) regex first ...
153
+ def check_format(line)
154
+ @known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format|
155
+ return key if line.match(log_format.format_regex)
156
+ }
157
+ return :unknown
158
+ end
159
+
160
+ # Builds the format from the IIS fielnames
161
+ def build_format(line)
162
+ fields = line.split(' ')
163
+ i = 1
164
+ @format = ""
165
+ while i < fields.length
166
+ @format << "%"+FIELDNAMES[fields[i]]+" "
167
+ i = i + 1
168
+ end
169
+ return @format
170
+ end
171
+
172
+ # Match a partial string in field against an external field array
173
+ def match_partial (field, fldarray)
174
+ hit = nil
175
+ i = 0
176
+ while i < fldarray.length
177
+ hit = field.index(fldarray[i])
178
+ break if hit
179
+ i = i + 1
180
+ end
181
+ return hit
182
+ end
183
+
184
+ # Find an assigned number from matching string against an external field array
185
+ def find_assigned_number (field, fldarray)
186
+ pageid = 0
187
+ i = 0
188
+ while i < fldarray.length
189
+ hit = fldarray[i].index(field)
190
+ if hit
191
+ z = fldarray[i].split(',')
192
+ pageid = z[1].to_i
193
+ break
194
+ end
195
+ i = i + 1
196
+ end
197
+ return pageid
198
+ end
199
+
200
+ # apache files ... regex the file to determine logformat name
201
+ # IIS files ... parse the fields string to determine the file contents
202
+ def parse_line(line, logformat)
203
+ if logformat != nil
204
+ log_format = logformat # get log_format string
205
+ @format_name = "temp"
206
+ data = line.split(' ')
207
+ else
208
+ @format_name = check_format(line) # look for matching formats, check each time
209
+ log_format = @known_formats[@format_name] # found a matched format
210
+ raise ArgumentError if log_format.nil? or line !~ log_format.format_regex
211
+ data = line.downcase.scan(log_format.format_regex).flatten
212
+ end
213
+ parsed_data = {}
214
+ log_format.format_symbols.size.times do |i|
215
+ parsed_data[log_format.format_symbols[i]] = data[i] # load data for each format_symbol
216
+ end
217
+
218
+ parsed_data[:p_logformatname] = @format_name.to_s
219
+ parsed_data[:p_logformat] = logformat
220
+ parsed_data[:p_visitortype] = "H" # set default visitor type (H)uman
221
+ parsed_data[:p_linetype] = "V" # linetype is (V)isitors
222
+ parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
223
+ if @format_name.to_s == "w3c_f" # IIS file name ... generic
224
+ @format = build_format(line) # parse fields to get log_format
225
+ temp_format = Vizi::LogFormat.new(:temp, @format) # create temp format
226
+ parsed_data[:p_logformat] = temp_format # shuttle the log_format object
227
+ parsed_data[:p_logformatname] = "iis" # change the name to iis
228
+ parsed_data[:p_linetype] = "F" # linetype to (F)ield list
229
+ parsed_data[:p_visitortype] = "-" # visitor type not relevant
230
+ elsif @format_name.to_s == "w3c_c" # found IIS file in comments section
231
+ parsed_data[:p_linetype] = "C" # linetype is (C)omment
232
+ parsed_data[:p_visitortype] = "-"
233
+ elsif parsed_data[:p_linetype] == "C"
234
+ @parselog.warn line
235
+ @parselog.warn "Found comment lines embedded in the log file ... resetting to nil"
236
+ parsed_data[:p_logformat] = nil
237
+ else # parsing the field names
238
+
239
+ if parsed_data[:datestring]
240
+ dt = Time.parse(parsed_data[:datestring]+" "+parsed_data[:timestring])
241
+ parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)
242
+ parsed_data[:datetime] = parsed_data[:datetime].getlocal if @use_local_time
243
+ end
244
+
245
+ if parsed_data[:dtstring]
246
+ parsed_data[:dtstring] = parsed_data[:dtstring][1...-1]
247
+ parsed_data[:dtstring] = parsed_data[:dtstring].sub(":", " ")
248
+ dt = Time.parse(parsed_data[:dtstring])
249
+ parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)
250
+ parsed_data[:datetime] = parsed_data[:datetime].getlocal if @use_local_time
251
+ end
252
+
253
+ if parsed_data[:request]
254
+ # splitrequest = parsed_data[:request].gsub("/", " ").split
255
+ splitrequest = parsed_data[:request].split(' ')
256
+ parsed_data[:csuristem] = splitrequest[1]
257
+ end
258
+
259
+ # Now classify visitortype based on logger yml rules ...
260
+
261
+ parsed_data[:p_pageflag] = false
262
+ if @accept_only_homepage
263
+ #p @homepage
264
+ #p parsed_data[:csuristem]
265
+ parsed_data[:p_pageflag] = true if parsed_data[:csuristem].downcase.index(@homepage) == 0
266
+ else
267
+ parsed_data[:p_pageflag] = true if match_partial(parsed_data[:csuristem], @page_urls)
268
+ end
269
+ parsed_data[:p_pageflag] = false if @hide_urls and match_partial(parsed_data[:csuristem], @hide_urls)
270
+
271
+ parsed_data[:p_visitortype] = "D" if @drops and @drops.index(parsed_data[:ip])
272
+ parsed_data[:p_visitortype] = "S" if @sips and@sips.index(parsed_data[:ip])
273
+
274
+ if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent], @snames)
275
+ parsed_data[:p_visitortype] = "S"
276
+ end
277
+
278
+ if parsed_data[:referer]
279
+ y = (/(search\?\S*?[pq])=(\S*?)(&)/).match(parsed_data[:referer])
280
+ parsed_data[:p_searchphrase] = y[2] if y != nil
281
+ if @drop_refers_by_hostname
282
+ parsed_data[:p_visitortype] = "D" if parsed_data[:referer].index(@hostname) != nil
283
+ end
284
+ end
285
+
286
+ if @match_page_numbers and parsed_data[:p_pageflag]
287
+ parsed_data[:p_pageid] = find_assigned_number(parsed_data[:csuristem], @assigned_numbers)
288
+ # p ">>" + parsed_data[:p_pageid].to_s if parsed_data[:p_pageid]
289
+ end
290
+
291
+ end
292
+ parsed_data
293
+ end
294
+ end
295
+
296
+ # This class creates and stores information related to each visit
297
+ # Visits are determined on the basis of the IP Address hits during a timed interval
298
+ #
299
+ class Visit
300
+ attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :pages, :robots, :visitortype, :searchphrase
301
+
302
+ # This method calculates the rank
303
+ def calculate_rank(pages, duration, visitortype)
304
+ ranktotal = [pages,9].min*10 + [duration/60,9].min
305
+ rank = ((ranktotal+10)/20).round
306
+ rank = 1 if rank == 0
307
+ rank = -rank if visitortype == "S"
308
+ rank = 0 if visitortype == "D"
309
+ return rank
310
+ end
311
+
312
+ # This method extracts the name of a downloaded file from the csuriquery value
313
+ def get_download(csuriquery, timetaken)
314
+ download = nil
315
+ if timetaken.to_i > 4000
316
+ split_uri = csuriquery.split("file=")
317
+ download = split_uri[1]
318
+ p download
319
+ end
320
+ return download
321
+ end
322
+
323
+ # The method completes the initialization and update methods
324
+ def add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
325
+ @searchphrase = p_searchphrase if p_searchphrase
326
+ @rank = calculate_rank(@pages, @duration, @visitortype)
327
+ @pageids = []
328
+ if p_pageid
329
+ @pageids << p_pageid
330
+ else
331
+ z=(/(PageID)=(\d+)/).match(csuriquery)
332
+ if z
333
+ p_pageid = z[2].to_i
334
+ @pageids << p_pageid
335
+ @download_file = get_download(csuriquery, timetaken) if p_pageid == @@download_page_number
336
+ end
337
+ end
338
+ end
339
+
340
+ # This method initializes the Visit object. Load object with parsed data
341
+ def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
342
+ @ip = ip
343
+ @start_dt = log_dt
344
+ @expire_dt = @start_dt + @@visit_timeout
345
+ @end_dt = @start_dt
346
+ @duration = 0
347
+ @hits = 0
348
+ @pages = 0
349
+ @pages = 1 if p_pageflag
350
+ @visitortype = p_visitortype
351
+ @visitortype = "S" if csuristem == "/robots.txt"
352
+ @searchphrase = ""
353
+ add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
354
+ end
355
+
356
+ # This method updates the Visit object with new parsed data
357
+ def update(end_dt, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
358
+ @end_dt = end_dt
359
+ @duration = (@end_dt - @start_dt).to_i
360
+ @hits = @hits + 1
361
+ @pages = @pages + 1 if p_pageflag
362
+ @visitortype = p_visitortype if @visitortype == "H"
363
+ add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
364
+ end
365
+
366
+ def sendoutput
367
+ #if @rank > 0
368
+ iplong = @ip.to_s+" "
369
+ p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@visitortype+" Hits> "+@hits.to_s+" Pgs> "+@pages.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s
370
+ p" Phrase> "+@searchphrase if @searchphrase.length > 0
371
+ p @pageids if @pageids.length > 0
372
+ #end
373
+ end
374
+ end
375
+
376
+ # This class creates and manages a list to keep track of the visits that are in process (cached)
377
+ # Once a visit reaches the time interval, an output transaction is generated and the visit is removed from the list
378
+ #
379
+ class VisitList
380
+ def initialize
381
+ @visits = Array.new
382
+ end
383
+
384
+ def append(visit)
385
+ @visits.push(visit)
386
+ self
387
+ end
388
+
389
+ def delete(visit)
390
+ @visits.delete(visit)
391
+ end
392
+
393
+ def find_all
394
+ @visits
395
+ end
396
+
397
+ def find_by_ip(ip)
398
+ @visits.find { |visit| ip == visit.ip }
399
+ end
400
+
401
+ def find_expired(test_dt)
402
+ @visits.find { |visit| visit.expire_dt < test_dt }
403
+ end
404
+ end
405
+
406
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+ # module ViziTracker # :doc:
3
+ ## require 'vizi_tracker'
4
+ load './lib/vizi/vizi_tracker.rb'
5
+ # end
data/log/parse.log ADDED
@@ -0,0 +1,79 @@
1
+ # Logfile created on 2011-06-05 21:46:32 -0400 by logger.rb/25413
2
+ W, [2011-06-05T21:46:32.006263 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
3
+
4
+ W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
5
+ W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
6
+
7
+ W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
8
+ W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
9
+
10
+ W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
11
+ W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
12
+
13
+ W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
14
+ W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
15
+
16
+ W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
17
+ W, [2011-06-05T22:08:53.528629 #560] WARN -- :
18
+
19
+ W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
20
+ W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
21
+
22
+ W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
23
+ W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
24
+
25
+ W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
26
+ W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
27
+
28
+ W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
29
+ W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
30
+
31
+ W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
32
+ W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
33
+
34
+ W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
35
+ W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
36
+
37
+ W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
38
+ W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
39
+
40
+ W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
41
+ W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
42
+
43
+ W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
44
+ W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
45
+
46
+ W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
47
+ W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
48
+
49
+ W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
50
+ W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
51
+
52
+ W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
53
+ W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
54
+
55
+ W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
56
+ W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
57
+
58
+ W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
59
+ W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
60
+
61
+ W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
62
+ W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
63
+
64
+ W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
65
+ W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
66
+
67
+ W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
68
+ W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
69
+
70
+ W, [2011-06-10T22:04:33.846398 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
71
+ W, [2011-06-10T22:04:34.038272 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
72
+
73
+ W, [2011-06-10T22:04:34.038371 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
74
+ W, [2011-06-10T22:04:34.123624 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
75
+
76
+ W, [2011-06-10T22:04:34.123712 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
77
+ W, [2011-06-10T22:04:34.221506 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
78
+
79
+ W, [2011-06-10T22:04:34.221596 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil