vizi_tracker 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/README.rdoc +116 -0
  2. data/Rakefile +44 -0
  3. data/config/logger-backup.yml +49 -0
  4. data/config/logger.yml +69 -0
  5. data/config/logger_apache.yml +65 -0
  6. data/config/logger_sample.yml +69 -0
  7. data/data/exlog.log +5458 -0
  8. data/data/sample-alter.log +11870 -0
  9. data/data/sample-surf.log +47 -0
  10. data/data/sample-wle.log +30474 -0
  11. data/data/testlog.log +270 -0
  12. data/data/vizitrax.log +17951 -0
  13. data/doc/Object.html +200 -0
  14. data/doc/ParserTest.html +268 -0
  15. data/doc/README_rdoc.html +128 -0
  16. data/doc/Rakefile.html +148 -0
  17. data/doc/Visit.html +487 -0
  18. data/doc/VisitList.html +385 -0
  19. data/doc/Vizi/LogFormat.html +377 -0
  20. data/doc/Vizi/LogParser.html +551 -0
  21. data/doc/Vizi/Visit.html +487 -0
  22. data/doc/Vizi/VisitList.html +386 -0
  23. data/doc/Vizi.html +168 -0
  24. data/doc/ViziLogFormat.html +382 -0
  25. data/doc/ViziLogParser.html +551 -0
  26. data/doc/created.rid +8 -0
  27. data/doc/formats/apache-custom-log.pdf +0 -0
  28. data/doc/formats/apache.rtf +238 -0
  29. data/doc/formats/format-descriptions.xls +0 -0
  30. data/doc/formats/w3c-extended.pdf +0 -0
  31. data/doc/formats/w3c-extended.rtf +135 -0
  32. data/doc/images/brick.png +0 -0
  33. data/doc/images/brick_link.png +0 -0
  34. data/doc/images/bug.png +0 -0
  35. data/doc/images/bullet_black.png +0 -0
  36. data/doc/images/bullet_toggle_minus.png +0 -0
  37. data/doc/images/bullet_toggle_plus.png +0 -0
  38. data/doc/images/date.png +0 -0
  39. data/doc/images/find.png +0 -0
  40. data/doc/images/loadingAnimation.gif +0 -0
  41. data/doc/images/macFFBgHack.png +0 -0
  42. data/doc/images/package.png +0 -0
  43. data/doc/images/page_green.png +0 -0
  44. data/doc/images/page_white_text.png +0 -0
  45. data/doc/images/page_white_width.png +0 -0
  46. data/doc/images/plugin.png +0 -0
  47. data/doc/images/ruby.png +0 -0
  48. data/doc/images/tag_green.png +0 -0
  49. data/doc/images/wrench.png +0 -0
  50. data/doc/images/wrench_orange.png +0 -0
  51. data/doc/images/zoom.png +0 -0
  52. data/doc/index.html +112 -0
  53. data/doc/js/darkfish.js +116 -0
  54. data/doc/js/jquery.js +32 -0
  55. data/doc/js/quicksearch.js +114 -0
  56. data/doc/js/thickbox-compressed.js +10 -0
  57. data/doc/lib/vizi/parser_rb.html +63 -0
  58. data/doc/lib/vizi/vizi_tracker_rb.html +63 -0
  59. data/doc/lib/vizi_log_parser_rb.html +56 -0
  60. data/doc/lib/vizi_tracker_rb.html +56 -0
  61. data/doc/rdoc.css +759 -0
  62. data/doc/test/parser_test_rb.html +54 -0
  63. data/doc/test/test_helper_rb.html +56 -0
  64. data/doc/testit_rb.html +63 -0
  65. data/lib/vizi/vizi_tracker.rb +406 -0
  66. data/lib/vizi_tracker.rb +5 -0
  67. data/log/parse.log +79 -0
  68. data/log/system.log +66 -0
  69. data/test/parser_test.rb +48 -0
  70. data/test/test_helper.rb +3 -0
  71. data/testit.rb +105 -0
  72. data/vizi_tracker.gemspec +21 -0
  73. metadata +146 -0
@@ -0,0 +1,54 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6
+ <head>
7
+ <meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
8
+
9
+ <title>File: parser_test.rb [RDoc Documentation]</title>
10
+
11
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
12
+
13
+ <script src="../js/jquery.js" type="text/javascript"
14
+ charset="utf-8"></script>
15
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
16
+ charset="utf-8"></script>
17
+ <script src="../js/quicksearch.js" type="text/javascript"
18
+ charset="utf-8"></script>
19
+ <script src="../js/darkfish.js" type="text/javascript"
20
+ charset="utf-8"></script>
21
+ </head>
22
+
23
+ <body class="file file-popup">
24
+ <div id="metadata">
25
+ <dl>
26
+ <dt class="modified-date">Last Modified</dt>
27
+ <dd class="modified-date">2011-05-11 20:32:49 -0400</dd>
28
+
29
+
30
+ <dt class="requires">Requires</dt>
31
+ <dd class="requires">
32
+ <ul>
33
+
34
+ <li>uri</li>
35
+
36
+ </ul>
37
+ </dd>
38
+
39
+
40
+
41
+ </dl>
42
+ </div>
43
+
44
+ <div id="documentation">
45
+
46
+ <div class="description">
47
+ <h2>Description</h2>
48
+
49
+ </div>
50
+
51
+ </div>
52
+ </body>
53
+ </html>
54
+
@@ -0,0 +1,56 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6
+ <head>
7
+ <meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
8
+
9
+ <title>File: test_helper.rb [RDoc Documentation]</title>
10
+
11
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
12
+
13
+ <script src="../js/jquery.js" type="text/javascript"
14
+ charset="utf-8"></script>
15
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
16
+ charset="utf-8"></script>
17
+ <script src="../js/quicksearch.js" type="text/javascript"
18
+ charset="utf-8"></script>
19
+ <script src="../js/darkfish.js" type="text/javascript"
20
+ charset="utf-8"></script>
21
+ </head>
22
+
23
+ <body class="file file-popup">
24
+ <div id="metadata">
25
+ <dl>
26
+ <dt class="modified-date">Last Modified</dt>
27
+ <dd class="modified-date">2011-05-11 20:32:49 -0400</dd>
28
+
29
+
30
+ <dt class="requires">Requires</dt>
31
+ <dd class="requires">
32
+ <ul>
33
+
34
+ <li>test/unit</li>
35
+
36
+ <li>ruby-debug</li>
37
+
38
+ </ul>
39
+ </dd>
40
+
41
+
42
+
43
+ </dl>
44
+ </div>
45
+
46
+ <div id="documentation">
47
+
48
+ <div class="description">
49
+ <h2>Description</h2>
50
+
51
+ </div>
52
+
53
+ </div>
54
+ </body>
55
+ </html>
56
+
@@ -0,0 +1,63 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6
+ <head>
7
+ <meta content="text/html; charset=IBM437" http-equiv="Content-Type" />
8
+
9
+ <title>File: testit.rb [RDoc Documentation]</title>
10
+
11
+ <link type="text/css" media="screen" href="./rdoc.css" rel="stylesheet" />
12
+
13
+ <script src="./js/jquery.js" type="text/javascript"
14
+ charset="utf-8"></script>
15
+ <script src="./js/thickbox-compressed.js" type="text/javascript"
16
+ charset="utf-8"></script>
17
+ <script src="./js/quicksearch.js" type="text/javascript"
18
+ charset="utf-8"></script>
19
+ <script src="./js/darkfish.js" type="text/javascript"
20
+ charset="utf-8"></script>
21
+ </head>
22
+
23
+ <body class="file file-popup">
24
+ <div id="metadata">
25
+ <dl>
26
+ <dt class="modified-date">Last Modified</dt>
27
+ <dd class="modified-date">2011-05-21 12:58:06 -0400</dd>
28
+
29
+
30
+ <dt class="requires">Requires</dt>
31
+ <dd class="requires">
32
+ <ul>
33
+
34
+ <li>c:\rails\vizi_tracker\lib\vizi_tracker</li>
35
+
36
+ <li>time</li>
37
+
38
+ <li>yaml</li>
39
+
40
+ <li>logger</li>
41
+
42
+ </ul>
43
+ </dd>
44
+
45
+
46
+
47
+ </dl>
48
+ </div>
49
+
50
+ <div id="documentation">
51
+
52
+ <div class="description">
53
+ <h2>Description</h2>
54
+
55
+ <p>This is a sample application that uses the Vizitracker gem classes Modified
56
+ by ...</p>
57
+
58
+ </div>
59
+
60
+ </div>
61
+ </body>
62
+ </html>
63
+
@@ -0,0 +1,406 @@
1
+ # This gem module provides a set of classes to support the parsing of web log files and
2
+ # the creation of Visit records from the individual parsed web log records
3
+ #
4
+ # The LogFormat and LogParser classes were derived in part from an Apache logger application
5
+ # developed by Jan Wikholm. These two classes were extended to support both Apache and IIS
6
+ # web logs. The details from the web logs are assembled to compose Visit objects and Visit
7
+ # history detail.
8
+ #
9
+ # Author:: Al Kivi <al.kivi@vizitrax.com>
10
+ # License:: MIT
11
+
12
+ module Vizi
13
+ # This class handles the parsing of each line in the log file
14
+ class LogFormat
15
+ attr_reader :name, :format, :format_symbols, :format_regex
16
+
17
+ DIRECTIVES = {
18
+ # format string char => [:symbol to use, /regex to use when matching against log/]
19
+ 'h' => [:ip, /\d+\.\d+\.\d+\.\d+/], # apache and IIS: called c-ip in IIS
20
+ 'p' => [:sip, /\d+\.\d+\.\d+\.\d+/], # IIS:
21
+ 'g' => [:auth, /\S*/], # apache:
22
+ 'u' => [:username, /\S*/], # apache and IIS: called cs-username in IIS
23
+ 't' => [:dtstring, /\[.*?\]/], # apache: one field with date and time
24
+ 'd' => [:datestring, /\d+\-\d+\-\d+/], # IIS:
25
+ 'e' => [:timestring, /\d+\:\d+\:\d+/], # IIS:
26
+ 'r' => [:request, /.*?/], # apache: includes both csmethod and csuristem
27
+ 'm' => [:csmethod, /\w*?/], # IIS:
28
+ 'w' => [:csuristem, /\S*/], # IIS:
29
+ 's' => [:status, /\d+/], # apache and IIS: is called sc_status in IIS
30
+ 'b' => [:bytecount, /-|\d+/], # apache and IIS: is called cs_bytes in IIS
31
+ 'v' => [:domain, /.*?/], # apache and IIS: is c-computername in IIS
32
+ 'i' => [:header_lines, /.*?/], # apache: transforms to useragent or referer or cookies
33
+ 'a' => [:useragent, /\S*/], # IIS:
34
+ 'j' => [:referer, /\S*/], # IIS:
35
+ 'k' => [:cscookie, /\d+/], # IIS:
36
+ 'q' => [:csuriquery, /.*/], # IIS:
37
+ 'y' => [:csbytes, /d+/], # IIS:
38
+ 'o' => [:sport, /\d+/], # IIS:
39
+ 'x' => [:scsubstatus, /\d+/], # IIS:
40
+ 'z' => [:cshost, /\d+/], # IIS:
41
+ 'l' => [:win32status, /\d+/], # IIS:
42
+ 'n' => [:timetaken, /\d+/], # IIS:
43
+ 'c' => [:comment, /^#/], # IIS: comment line identifier
44
+ 'f' => [:fields, /^#Fields:/] # IIS: field line identifier
45
+ }
46
+
47
+ # This method initializes the LogFormat object with fieldnames and log formats
48
+ def initialize(name, format)
49
+ @name, @format = name, format
50
+ parse_format(format)
51
+ end
52
+
53
+ # The symbols are used to map the log to the env variables
54
+ # The regex is used when checking what format the log is and to extract data
55
+ def parse_format(format)
56
+ format_directive = /%(.*?)(\{.*?\})?([#{[DIRECTIVES.keys.join('|')]}])([\s\\"]*)/
57
+ log_format_symbols = []
58
+ format_regex = ""
59
+ format.scan(format_directive) do |condition, subdirective, directive_char, ignored|
60
+ log_format, match_regex = process_directive(directive_char, subdirective, condition)
61
+ ignored.gsub!(/\s/, '\\s') unless ignored.nil?
62
+ log_format_symbols << log_format
63
+ format_regex << "(#{match_regex})#{ignored}"
64
+ end
65
+ @format_symbols = log_format_symbols
66
+ @format_regex = /^#{format_regex}/
67
+ end
68
+
69
+ def process_directive(directive_char, subdirective, condition)
70
+ directive = DIRECTIVES[directive_char]
71
+ case directive_char
72
+ when 'i'
73
+ log_format = subdirective[1...-1].downcase.tr('-', '_').to_sym
74
+ [log_format, directive[1].source]
75
+ else
76
+ [directive[0], directive[1].source]
77
+ end
78
+ end
79
+ end
80
+
81
+ # This class handles the parsing of each line in the log file
82
+ class LogParser
83
+ require 'time'
84
+
85
+ LOG_FORMATS = {
86
+ :common => '%h %g %u %t \"%r\" %>s %b',
87
+ :common_with_virtual => '%v %h %g %u %t \"%r\" %>s %b',
88
+ :combined => '%h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
89
+ :combined_with_virtual => '%v %h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
90
+ :combined_with_cookies => '%h %g %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookies}i\"',
91
+ :w3c_c => '%c', # format is comment ... still looking
92
+ :w3c_f => '%f' # format is IIS fields ... ready to parse
93
+ }
94
+
95
+ FIELDNAMES = {
96
+ 'c-ip' => 'h',
97
+ 's-ip' => 'p',
98
+ 'cs-username' => 'u',
99
+ 'sc-status' => 's',
100
+ 'cs-bytes' => 'y',
101
+ 'sc-bytes' => 'b',
102
+ 'date' => 'd',
103
+ 'time' => 'e',
104
+ 'cs-method' => 'm',
105
+ 'cs-uri-stem' => 'w',
106
+ 'cs-uri-query' => 'q',
107
+ 'cs(Referer)' => 'j', # internal shortened to referer
108
+ 'cs(User-Agent)' => 'a', # internal shortened to useragent
109
+ 'cs(Cookie)' => 'k', # internal shortened to cscookie
110
+ 's-port' => 'o',
111
+ 'cs-host' => 'z',
112
+ 'sc-substatus' => 'x',
113
+ 'sc-win32-status' => 'l',
114
+ 'time-taken' => 'n',
115
+ 's-computername' => 'v'
116
+ }
117
+
118
+ attr_reader :known_formats
119
+
120
+ #@@log = ActiveRecord::Base.logger
121
+
122
+ # This method initialises LogParser object and loads the configurable logger control items
123
+ def initialize(drop_ips, spider_ips, spider_names, page_urls, hide_urls, homepage, accept_only_homepage,
124
+ hostname, drop_refers_by_hostname, use_local_time, assigned_numbers, match_page_numbers)
125
+ @drops = drop_ips
126
+ @sips = spider_ips
127
+ @snames = spider_names
128
+ @page_urls = page_urls
129
+ @hide_urls = hide_urls
130
+ @homepage = homepage
131
+ @accept_only_homepage = accept_only_homepage
132
+ @hostname = hostname
133
+ @drop_refers_by_hostname = drop_refers_by_hostname
134
+ @use_local_time = use_local_time
135
+ @assigned_numbers = assigned_numbers
136
+ @match_page_numbers = match_page_numbers
137
+ @log_format = []
138
+ initialize_known_formats
139
+ @parselog = Logger.new('./log/parse.log', shift_age = 'weekly')
140
+ @parselog.level = Logger::WARN
141
+ end
142
+
143
+ # Processes the format string into symbols and test regex and saves using LogFormat class
144
+ def initialize_known_formats
145
+ @known_formats = {}
146
+ LOG_FORMATS.each do |name, format|
147
+ @known_formats[name] = Vizi::LogFormat.new(name, format)
148
+ end
149
+ end
150
+
151
+ # Checks which standard the log file (well one line) is
152
+ # Automatically checks for most complex (longest) regex first ...
153
+ def check_format(line)
154
+ @known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format|
155
+ return key if line.match(log_format.format_regex)
156
+ }
157
+ return :unknown
158
+ end
159
+
160
+ # Builds the format from the IIS fielnames
161
+ def build_format(line)
162
+ fields = line.split(' ')
163
+ i = 1
164
+ @format = ""
165
+ while i < fields.length
166
+ @format << "%"+FIELDNAMES[fields[i]]+" "
167
+ i = i + 1
168
+ end
169
+ return @format
170
+ end
171
+
172
+ # Match a partial string in field against an external field array
173
+ def match_partial (field, fldarray)
174
+ hit = nil
175
+ i = 0
176
+ while i < fldarray.length
177
+ hit = field.index(fldarray[i])
178
+ break if hit
179
+ i = i + 1
180
+ end
181
+ return hit
182
+ end
183
+
184
+ # Find an assigned number from matching string against an external field array
185
+ def find_assigned_number (field, fldarray)
186
+ pageid = 0
187
+ i = 0
188
+ while i < fldarray.length
189
+ hit = fldarray[i].index(field)
190
+ if hit
191
+ z = fldarray[i].split(',')
192
+ pageid = z[1].to_i
193
+ break
194
+ end
195
+ i = i + 1
196
+ end
197
+ return pageid
198
+ end
199
+
200
+ # apache files ... regex the file to determine logformat name
201
+ # IIS files ... parse the fields string to determine the file contents
202
+ def parse_line(line, logformat)
203
+ if logformat != nil
204
+ log_format = logformat # get log_format string
205
+ @format_name = "temp"
206
+ data = line.split(' ')
207
+ else
208
+ @format_name = check_format(line) # look for matching formats, check each time
209
+ log_format = @known_formats[@format_name] # found a matched format
210
+ raise ArgumentError if log_format.nil? or line !~ log_format.format_regex
211
+ data = line.downcase.scan(log_format.format_regex).flatten
212
+ end
213
+ parsed_data = {}
214
+ log_format.format_symbols.size.times do |i|
215
+ parsed_data[log_format.format_symbols[i]] = data[i] # load data for each format_symbol
216
+ end
217
+
218
+ parsed_data[:p_logformatname] = @format_name.to_s
219
+ parsed_data[:p_logformat] = logformat
220
+ parsed_data[:p_visitortype] = "H" # set default visitor type (H)uman
221
+ parsed_data[:p_linetype] = "V" # linetype is (V)isitors
222
+ parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
223
+ if @format_name.to_s == "w3c_f" # IIS file name ... generic
224
+ @format = build_format(line) # parse fields to get log_format
225
+ temp_format = Vizi::LogFormat.new(:temp, @format) # create temp format
226
+ parsed_data[:p_logformat] = temp_format # shuttle the log_format object
227
+ parsed_data[:p_logformatname] = "iis" # change the name to iis
228
+ parsed_data[:p_linetype] = "F" # linetype to (F)ield list
229
+ parsed_data[:p_visitortype] = "-" # visitor type not relevant
230
+ elsif @format_name.to_s == "w3c_c" # found IIS file in comments section
231
+ parsed_data[:p_linetype] = "C" # linetype is (C)omment
232
+ parsed_data[:p_visitortype] = "-"
233
+ elsif parsed_data[:p_linetype] == "C"
234
+ @parselog.warn line
235
+ @parselog.warn "Found comment lines embedded in the log file ... resetting to nil"
236
+ parsed_data[:p_logformat] = nil
237
+ else # parsing the field names
238
+
239
+ if parsed_data[:datestring]
240
+ dt = Time.parse(parsed_data[:datestring]+" "+parsed_data[:timestring])
241
+ parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)
242
+ parsed_data[:datetime] = parsed_data[:datetime].getlocal if @use_local_time
243
+ end
244
+
245
+ if parsed_data[:dtstring]
246
+ parsed_data[:dtstring] = parsed_data[:dtstring][1...-1]
247
+ parsed_data[:dtstring] = parsed_data[:dtstring].sub(":", " ")
248
+ dt = Time.parse(parsed_data[:dtstring])
249
+ parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)
250
+ parsed_data[:datetime] = parsed_data[:datetime].getlocal if @use_local_time
251
+ end
252
+
253
+ if parsed_data[:request]
254
+ # splitrequest = parsed_data[:request].gsub("/", " ").split
255
+ splitrequest = parsed_data[:request].split(' ')
256
+ parsed_data[:csuristem] = splitrequest[1]
257
+ end
258
+
259
+ # Now classify visitortype based on logger yml rules ...
260
+
261
+ parsed_data[:p_pageflag] = false
262
+ if @accept_only_homepage
263
+ #p @homepage
264
+ #p parsed_data[:csuristem]
265
+ parsed_data[:p_pageflag] = true if parsed_data[:csuristem].downcase.index(@homepage) == 0
266
+ else
267
+ parsed_data[:p_pageflag] = true if match_partial(parsed_data[:csuristem], @page_urls)
268
+ end
269
+ parsed_data[:p_pageflag] = false if @hide_urls and match_partial(parsed_data[:csuristem], @hide_urls)
270
+
271
+ parsed_data[:p_visitortype] = "D" if @drops and @drops.index(parsed_data[:ip])
272
+ parsed_data[:p_visitortype] = "S" if @sips and@sips.index(parsed_data[:ip])
273
+
274
+ if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent], @snames)
275
+ parsed_data[:p_visitortype] = "S"
276
+ end
277
+
278
+ if parsed_data[:referer]
279
+ y = (/(search\?\S*?[pq])=(\S*?)(&)/).match(parsed_data[:referer])
280
+ parsed_data[:p_searchphrase] = y[2] if y != nil
281
+ if @drop_refers_by_hostname
282
+ parsed_data[:p_visitortype] = "D" if parsed_data[:referer].index(@hostname) != nil
283
+ end
284
+ end
285
+
286
+ if @match_page_numbers and parsed_data[:p_pageflag]
287
+ parsed_data[:p_pageid] = find_assigned_number(parsed_data[:csuristem], @assigned_numbers)
288
+ # p ">>" + parsed_data[:p_pageid].to_s if parsed_data[:p_pageid]
289
+ end
290
+
291
+ end
292
+ parsed_data
293
+ end
294
+ end
295
+
296
+ # This class creates and stores information related to each visit
297
+ # Visits are determined on the basis of the IP Address hits during a timed interval
298
+ #
299
+ class Visit
300
+ attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :pages, :robots, :visitortype, :searchphrase
301
+
302
+ # This method calculates the rank
303
+ def calculate_rank(pages, duration, visitortype)
304
+ ranktotal = [pages,9].min*10 + [duration/60,9].min
305
+ rank = ((ranktotal+10)/20).round
306
+ rank = 1 if rank == 0
307
+ rank = -rank if visitortype == "S"
308
+ rank = 0 if visitortype == "D"
309
+ return rank
310
+ end
311
+
312
+ # This method extracts the name of a downloaded file from the csuriquery value
313
+ def get_download(csuriquery, timetaken)
314
+ download = nil
315
+ if timetaken.to_i > 4000
316
+ split_uri = csuriquery.split("file=")
317
+ download = split_uri[1]
318
+ p download
319
+ end
320
+ return download
321
+ end
322
+
323
+ # The method completes the initialization and update methods
324
+ def add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
325
+ @searchphrase = p_searchphrase if p_searchphrase
326
+ @rank = calculate_rank(@pages, @duration, @visitortype)
327
+ @pageids = []
328
+ if p_pageid
329
+ @pageids << p_pageid
330
+ else
331
+ z=(/(PageID)=(\d+)/).match(csuriquery)
332
+ if z
333
+ p_pageid = z[2].to_i
334
+ @pageids << p_pageid
335
+ @download_file = get_download(csuriquery, timetaken) if p_pageid == @@download_page_number
336
+ end
337
+ end
338
+ end
339
+
340
+ # This method initializes the Visit object. Load object with parsed data
341
+ def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
342
+ @ip = ip
343
+ @start_dt = log_dt
344
+ @expire_dt = @start_dt + @@visit_timeout
345
+ @end_dt = @start_dt
346
+ @duration = 0
347
+ @hits = 0
348
+ @pages = 0
349
+ @pages = 1 if p_pageflag
350
+ @visitortype = p_visitortype
351
+ @visitortype = "S" if csuristem == "/robots.txt"
352
+ @searchphrase = ""
353
+ add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
354
+ end
355
+
356
+ # This method updates the Visit object with new parsed data
357
+ def update(end_dt, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
358
+ @end_dt = end_dt
359
+ @duration = (@end_dt - @start_dt).to_i
360
+ @hits = @hits + 1
361
+ @pages = @pages + 1 if p_pageflag
362
+ @visitortype = p_visitortype if @visitortype == "H"
363
+ add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
364
+ end
365
+
366
+ def sendoutput
367
+ #if @rank > 0
368
+ iplong = @ip.to_s+" "
369
+ p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@visitortype+" Hits> "+@hits.to_s+" Pgs> "+@pages.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s
370
+ p" Phrase> "+@searchphrase if @searchphrase.length > 0
371
+ p @pageids if @pageids.length > 0
372
+ #end
373
+ end
374
+ end
375
+
376
+ # This class creates and manages a list to keep track of the visits that are in process (cached)
377
+ # Once a visit reaches the time interval, an output transaction is generated and the visit is removed from the list
378
+ #
379
+ class VisitList
380
+ def initialize
381
+ @visits = Array.new
382
+ end
383
+
384
+ def append(visit)
385
+ @visits.push(visit)
386
+ self
387
+ end
388
+
389
+ def delete(visit)
390
+ @visits.delete(visit)
391
+ end
392
+
393
+ def find_all
394
+ @visits
395
+ end
396
+
397
+ def find_by_ip(ip)
398
+ @visits.find { |visit| ip == visit.ip }
399
+ end
400
+
401
+ def find_expired(test_dt)
402
+ @visits.find { |visit| visit.expire_dt < test_dt }
403
+ end
404
+ end
405
+
406
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+ # module ViziTracker # :doc:
3
+ ## require 'vizi_tracker'
4
+ load './lib/vizi/vizi_tracker.rb'
5
+ # end
data/log/parse.log ADDED
@@ -0,0 +1,79 @@
1
+ # Logfile created on 2011-06-05 21:46:32 -0400 by logger.rb/25413
2
+ W, [2011-06-05T21:46:32.006263 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
3
+
4
+ W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
5
+ W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
6
+
7
+ W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
8
+ W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
9
+
10
+ W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
11
+ W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
12
+
13
+ W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
14
+ W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
15
+
16
+ W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
17
+ W, [2011-06-05T22:08:53.528629 #560] WARN -- :
18
+
19
+ W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
20
+ W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
21
+
22
+ W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
23
+ W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
24
+
25
+ W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
26
+ W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
27
+
28
+ W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
29
+ W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
30
+
31
+ W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
32
+ W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
33
+
34
+ W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
35
+ W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
36
+
37
+ W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
38
+ W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
39
+
40
+ W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
41
+ W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
42
+
43
+ W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
44
+ W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
45
+
46
+ W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
47
+ W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
48
+
49
+ W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
50
+ W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
51
+
52
+ W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
53
+ W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
54
+
55
+ W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
56
+ W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
57
+
58
+ W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
59
+ W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
60
+
61
+ W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
62
+ W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
63
+
64
+ W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
65
+ W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
66
+
67
+ W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
68
+ W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
69
+
70
+ W, [2011-06-10T22:04:33.846398 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
71
+ W, [2011-06-10T22:04:34.038272 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
72
+
73
+ W, [2011-06-10T22:04:34.038371 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
74
+ W, [2011-06-10T22:04:34.123624 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
75
+
76
+ W, [2011-06-10T22:04:34.123712 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
77
+ W, [2011-06-10T22:04:34.221506 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
78
+
79
+ W, [2011-06-10T22:04:34.221596 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil