vizi_tracker 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/config/logger.yml +80 -42
- data/lib/vizi/vizi_tracker.rb +243 -110
- data/log/parse.log +21 -75
- data/log/parse.log.20120428 +79 -0
- data/log/system.log +12 -66
- data/log/system.log.20120428 +66 -0
- data/testit.rb +16 -17
- data/vizi_tracker-0.3.0.gem +0 -0
- data/vizi_tracker.gemspec +2 -2
- metadata +10 -7
data/config/logger.yml
CHANGED
@@ -1,49 +1,39 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# basic configuration data ...................................
|
2
|
+
max_rec_count: 99999
|
3
|
+
sub_directory: data/
|
3
4
|
visit_timeout: 1200
|
4
|
-
summary_flag: true
|
5
|
-
convert_to_lower_case: true
|
6
|
-
log_level: warn
|
7
5
|
#log_level options are debug, info, warn, error, fatal
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
-
|
16
|
-
-
|
17
|
-
|
18
|
-
-
|
19
|
-
|
20
|
-
- chart
|
21
|
-
- robots
|
6
|
+
log_level: info
|
7
|
+
log_record_counts: true
|
8
|
+
convert_to_lower_case: true
|
9
|
+
url_stem: /
|
10
|
+
accept_only_url_stem: false
|
11
|
+
include_urls:
|
12
|
+
- .htm
|
13
|
+
- .pdf
|
14
|
+
- home.aspx
|
15
|
+
exclude_urls:
|
16
|
+
- .html
|
17
|
+
# sitemap_match: true
|
22
18
|
hostname: www.sigma-systems.com
|
23
|
-
drop_refers_by_hostname:
|
19
|
+
drop_refers_by_hostname: false
|
20
|
+
# output_where_rank_over: 2
|
24
21
|
use_local_time: true
|
25
|
-
download_page_number: 45
|
26
22
|
drop_ips:
|
23
|
+
- 10.131.0.4
|
24
|
+
- 76.12.171.4
|
27
25
|
- 76.12.185.100
|
28
26
|
spider_ips:
|
29
27
|
- 66.98.254.55
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
- 202.108.22.132
|
35
|
-
- 89.122.29.77
|
36
|
-
- 95.174.93.222
|
37
|
-
- 66.55.37.179
|
38
|
-
- 198.45.18.20
|
39
|
-
- 38.104.227.3
|
28
|
+
# post processing instructions ..............................
|
29
|
+
output_where_rank_over: 2
|
30
|
+
sitemap_match: true
|
31
|
+
# spider and user agents ....................................
|
40
32
|
spider_names:
|
41
33
|
- bot
|
42
34
|
- spider
|
43
35
|
- slurp
|
44
36
|
- root.exe
|
45
|
-
- .dll
|
46
|
-
- slurp
|
47
37
|
- looksmart
|
48
38
|
- nutchsvc
|
49
39
|
- iconsurf
|
@@ -55,15 +45,63 @@ spider_names:
|
|
55
45
|
- konsqueror
|
56
46
|
- crawler
|
57
47
|
- searchme
|
58
|
-
-
|
48
|
+
- findlinks
|
59
49
|
- scoutjet
|
60
50
|
- yeti
|
61
51
|
- yandex
|
62
|
-
|
63
|
-
|
64
|
-
-
|
65
|
-
-
|
66
|
-
-
|
67
|
-
-
|
68
|
-
-
|
69
|
-
|
52
|
+
- fetch
|
53
|
+
- nutch
|
54
|
+
- sleuth
|
55
|
+
- globalspec
|
56
|
+
- openurl
|
57
|
+
- python
|
58
|
+
- acoon
|
59
|
+
- itim
|
60
|
+
- metauri
|
61
|
+
- extractor
|
62
|
+
- exractor
|
63
|
+
- pagegetter
|
64
|
+
- linkdex
|
65
|
+
- website
|
66
|
+
- xfruits
|
67
|
+
- binlar
|
68
|
+
- abacus
|
69
|
+
- anemone
|
70
|
+
- ichiro
|
71
|
+
- scraping
|
72
|
+
- xpymep
|
73
|
+
- swish
|
74
|
+
- sitemaps
|
75
|
+
- yahoocachesystem
|
76
|
+
- sexy
|
77
|
+
- .dll
|
78
|
+
- java/1
|
79
|
+
usual_agents:
|
80
|
+
- mozilla
|
81
|
+
- opera
|
82
|
+
- chrome
|
83
|
+
- lynx
|
84
|
+
- nokia
|
85
|
+
- blackberry
|
86
|
+
- sharp
|
87
|
+
- docomo
|
88
|
+
- curl
|
89
|
+
- facebook
|
90
|
+
- microsoft
|
91
|
+
- securepoint
|
92
|
+
- wordpress
|
93
|
+
# advanced: db lookups and csv ..................................
|
94
|
+
whois_lookup: true
|
95
|
+
use_db_lookup: true
|
96
|
+
use_db_visits: true
|
97
|
+
# create_csv_file: false
|
98
|
+
# advanced: email notification ..................................
|
99
|
+
admin_email: ...
|
100
|
+
user_email: ..
|
101
|
+
email_server: ...
|
102
|
+
send_logmsg_admin: ...
|
103
|
+
# advanced: google docs connection ..............................
|
104
|
+
use_gdocs: ...
|
105
|
+
gdocs_user: ...
|
106
|
+
gdocs_password: ...
|
107
|
+
gdocs_key: ...
|
data/lib/vizi/vizi_tracker.rb
CHANGED
@@ -18,30 +18,30 @@ module Vizi
|
|
18
18
|
# format string char => [:symbol to use, /regex to use when matching against log/]
|
19
19
|
'h' => [:ip, /\d+\.\d+\.\d+\.\d+/], # apache and IIS: called c-ip in IIS
|
20
20
|
'p' => [:sip, /\d+\.\d+\.\d+\.\d+/], # IIS:
|
21
|
-
'g' => [:auth, /\S*/],
|
22
|
-
'u' => [:username, /\S*/],
|
23
|
-
't' => [:dtstring, /\[.*?\]/],
|
21
|
+
'g' => [:auth, /\S*/], # apache:
|
22
|
+
'u' => [:username, /\S*/], # apache and IIS: called cs-username in IIS
|
23
|
+
't' => [:dtstring, /\[.*?\]/], # apache: one field with date and time
|
24
24
|
'd' => [:datestring, /\d+\-\d+\-\d+/], # IIS:
|
25
25
|
'e' => [:timestring, /\d+\:\d+\:\d+/], # IIS:
|
26
|
-
'r' => [:request, /.*?/],
|
27
|
-
'm' => [:csmethod, /\w*?/],
|
28
|
-
'w' => [:csuristem, /\S*/],
|
29
|
-
's' => [:status, /\d+/],
|
30
|
-
'b' => [:bytecount, /-|\d+/],
|
31
|
-
'v' => [:domain, /.*?/],
|
32
|
-
'i' => [:header_lines, /.*?/],
|
33
|
-
'a' => [:useragent, /\S*/],
|
34
|
-
'j' => [:referer, /\S*/],
|
35
|
-
'k' => [:cscookie, /\d+/],
|
36
|
-
'q' => [:csuriquery, /.*/],
|
37
|
-
'y' => [:csbytes, /d+/],
|
38
|
-
'o' => [:sport, /\d+/],
|
39
|
-
'x' => [:scsubstatus, /\d+/],
|
40
|
-
'z' => [:cshost, /\d+/],
|
41
|
-
'l' => [:win32status, /\d+/],
|
42
|
-
'n' => [:timetaken, /\d+/],
|
43
|
-
'c' => [:comment, /^#/],
|
44
|
-
'f' => [:fields, /^#Fields:/]
|
26
|
+
'r' => [:request, /.*?/], # apache: includes both csmethod and csuristem
|
27
|
+
'm' => [:csmethod, /\w*?/], # IIS:
|
28
|
+
'w' => [:csuristem, /\S*/], # IIS:
|
29
|
+
's' => [:status, /\d+/], # apache and IIS: is called sc_status in IIS
|
30
|
+
'b' => [:bytecount, /-|\d+/], # apache and IIS: is called cs_bytes in IIS
|
31
|
+
'v' => [:domain, /.*?/], # apache and IIS: is c-computername in IIS
|
32
|
+
'i' => [:header_lines, /.*?/], # apache: transforms to useragent or referer or cookies
|
33
|
+
'a' => [:useragent, /\S*/], # IIS:
|
34
|
+
'j' => [:referer, /\S*/], # IIS:
|
35
|
+
'k' => [:cscookie, /\d+/], # IIS:
|
36
|
+
'q' => [:csuriquery, /.*/], # IIS:
|
37
|
+
'y' => [:csbytes, /d+/], # IIS:
|
38
|
+
'o' => [:sport, /\d+/], # IIS:
|
39
|
+
'x' => [:scsubstatus, /\d+/], # IIS:
|
40
|
+
'z' => [:cshost, /\d+/], # IIS:
|
41
|
+
'l' => [:win32status, /\d+/], # IIS:
|
42
|
+
'n' => [:timetaken, /\d+/], # IIS:
|
43
|
+
'c' => [:comment, /^#/], # IIS: comment line identifier
|
44
|
+
'f' => [:fields, /^#Fields:/] # IIS: field line identifier
|
45
45
|
}
|
46
46
|
|
47
47
|
# This method initializes the LogFormat object with fieldnames and log formats
|
@@ -106,7 +106,7 @@ module Vizi
|
|
106
106
|
'cs-uri-query' => 'q',
|
107
107
|
'cs(Referer)' => 'j', # internal shortened to referer
|
108
108
|
'cs(User-Agent)' => 'a', # internal shortened to useragent
|
109
|
-
'cs(Cookie)' => 'k',
|
109
|
+
'cs(Cookie)' => 'k', # internal shortened to cscookie
|
110
110
|
's-port' => 'o',
|
111
111
|
'cs-host' => 'z',
|
112
112
|
'sc-substatus' => 'x',
|
@@ -120,20 +120,19 @@ module Vizi
|
|
120
120
|
#@@log = ActiveRecord::Base.logger
|
121
121
|
|
122
122
|
# This method initialises LogParser object and loads the configurable logger control items
|
123
|
-
def initialize(drop_ips, spider_ips, spider_names,
|
124
|
-
hostname, drop_refers_by_hostname,
|
123
|
+
def initialize(drop_ips, spider_ips, spider_names, include_urls, exclude_urls, url_stem, accept_only_url_stem,
|
124
|
+
hostname, drop_refers_by_hostname, usualagents, use_local_time)
|
125
125
|
@drops = drop_ips
|
126
126
|
@sips = spider_ips
|
127
127
|
@snames = spider_names
|
128
|
-
@
|
129
|
-
@
|
130
|
-
@
|
131
|
-
@
|
128
|
+
@include_urls = include_urls
|
129
|
+
@exclude_urls = exclude_urls
|
130
|
+
@url_stem = url_stem
|
131
|
+
@accept_only_url_stem = accept_only_url_stem
|
132
132
|
@hostname = hostname
|
133
133
|
@drop_refers_by_hostname = drop_refers_by_hostname
|
134
|
+
@usualagents = usualagents
|
134
135
|
@use_local_time = use_local_time
|
135
|
-
@assigned_numbers = assigned_numbers
|
136
|
-
@match_page_numbers = match_page_numbers
|
137
136
|
@log_format = []
|
138
137
|
initialize_known_formats
|
139
138
|
@parselog = Logger.new('./log/parse.log', shift_age = 'weekly')
|
@@ -174,7 +173,7 @@ module Vizi
|
|
174
173
|
hit = nil
|
175
174
|
i = 0
|
176
175
|
while i < fldarray.length
|
177
|
-
|
176
|
+
hit = field.index(fldarray[i])
|
178
177
|
break if hit
|
179
178
|
i = i + 1
|
180
179
|
end
|
@@ -199,6 +198,9 @@ module Vizi
|
|
199
198
|
|
200
199
|
# apache files ... regex the file to determine logformat name
|
201
200
|
# IIS files ... parse the fields string to determine the file contents
|
201
|
+
# :p_linetype ... line is a (C)ontrol line, (F)ield line or a good (V)isitor line
|
202
|
+
# :p_pageflag ... (Y)es is a valid page or (N)ot
|
203
|
+
# :p_vistortype ... (H)uman, (S)pider, (D)ropped or (-) Not relevant
|
202
204
|
def parse_line(line, logformat)
|
203
205
|
if logformat != nil
|
204
206
|
log_format = logformat # get log_format string
|
@@ -219,7 +221,7 @@ module Vizi
|
|
219
221
|
parsed_data[:p_logformat] = logformat
|
220
222
|
parsed_data[:p_visitortype] = "H" # set default visitor type (H)uman
|
221
223
|
parsed_data[:p_linetype] = "V" # linetype is (V)isitors
|
222
|
-
parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
|
224
|
+
parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
|
223
225
|
if @format_name.to_s == "w3c_f" # IIS file name ... generic
|
224
226
|
@format = build_format(line) # parse fields to get log_format
|
225
227
|
temp_format = Vizi::LogFormat.new(:temp, @format) # create temp format
|
@@ -252,28 +254,39 @@ module Vizi
|
|
252
254
|
|
253
255
|
if parsed_data[:request]
|
254
256
|
# splitrequest = parsed_data[:request].gsub("/", " ").split
|
255
|
-
|
257
|
+
splitrequest = parsed_data[:request].split(' ')
|
256
258
|
parsed_data[:csuristem] = splitrequest[1]
|
257
259
|
end
|
258
260
|
|
259
|
-
# Now
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
#p @homepage
|
264
|
-
#p parsed_data[:csuristem]
|
265
|
-
parsed_data[:p_pageflag] = true if parsed_data[:csuristem].downcase.index(@homepage) == 0
|
261
|
+
# Now determine visitortype based on logger yml rules ...
|
262
|
+
parsed_data[:p_pageflag] = "N"
|
263
|
+
if @accept_only_url_stem # indicates that url_stem must always appear at start of csuristem
|
264
|
+
parsed_data[:p_pageflag] = "Y" if parsed_data[:csuristem].downcase.index(@url_stem) == 0
|
266
265
|
else
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
266
|
+
if parsed_data[:csuristem].downcase == @url_stem
|
267
|
+
parsed_data[:p_pageflag] = "Y"
|
268
|
+
else
|
269
|
+
if @include_urls
|
270
|
+
parsed_data[:p_pageflag] = "Y" if match_partial(parsed_data[:csuristem].downcase, @include_urls)
|
271
|
+
end
|
272
|
+
if @exclude_urls
|
273
|
+
parsed_data[:p_pageflag] = "N" if match_partial(parsed_data[:csuristem].downcase, @exclude_urls)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
parsed_data[:p_visitortype] = "D" if parsed_data[:status] == "404"
|
271
279
|
parsed_data[:p_visitortype] = "D" if @drops and @drops.index(parsed_data[:ip])
|
272
|
-
parsed_data[:p_visitortype] = "S" if @sips and@sips.index(parsed_data[:ip])
|
273
|
-
|
274
|
-
if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent], @snames)
|
280
|
+
parsed_data[:p_visitortype] = "S" if @sips and @sips.index(parsed_data[:ip])
|
281
|
+
if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent].downcase, @snames)
|
275
282
|
parsed_data[:p_visitortype] = "S"
|
276
283
|
end
|
284
|
+
parsed_data[:p_visitortype] = "S" if parsed_data[:useragent] == "-"
|
285
|
+
parsed_data[:p_usualagent] = "Y"
|
286
|
+
parsed_data[:p_usualagent] = "N" if parsed_data[:p_visitortype] != "S" and not match_partial(parsed_data[:useragent].downcase, @usualagents)
|
287
|
+
|
288
|
+
parsed_data[:p_returnhit] = "N"
|
289
|
+
parsed_data[:p_returnhit] = "Y" if parsed_data[:status] == "304"
|
277
290
|
|
278
291
|
if parsed_data[:referer]
|
279
292
|
y = (/(search\?\S*?[pq])=(\S*?)(&)/).match(parsed_data[:referer])
|
@@ -283,11 +296,8 @@ module Vizi
|
|
283
296
|
end
|
284
297
|
end
|
285
298
|
|
286
|
-
|
287
|
-
|
288
|
-
# p ">>" + parsed_data[:p_pageid].to_s if parsed_data[:p_pageid]
|
289
|
-
end
|
290
|
-
|
299
|
+
parsed_data[:p_pdfstem] = nil
|
300
|
+
parsed_data[:p_pdfstem] = parsed_data[:csuristem].downcase if parsed_data[:csuristem].downcase.index("/pdfs/") == 0
|
291
301
|
end
|
292
302
|
parsed_data
|
293
303
|
end
|
@@ -297,80 +307,203 @@ module Vizi
|
|
297
307
|
# Visits are determined on the basis of the IP Address hits during a timed interval
|
298
308
|
#
|
299
309
|
class Visit
|
300
|
-
attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :
|
301
|
-
|
302
|
-
# This method calculates the rank
|
303
|
-
def calculate_rank(pages, duration, visitortype)
|
304
|
-
ranktotal = [pages,9].min*10 + [duration/60,9].min
|
305
|
-
rank = ((ranktotal+10)/20).round
|
306
|
-
rank = 1 if rank == 0
|
307
|
-
rank = -rank if visitortype == "S"
|
308
|
-
rank = 0 if visitortype == "D"
|
309
|
-
return rank
|
310
|
-
end
|
311
|
-
|
312
|
-
# This method extracts the name of a downloaded file from the csuriquery value
|
313
|
-
def get_download(csuriquery, timetaken)
|
314
|
-
download = nil
|
315
|
-
if timetaken.to_i > 4000
|
316
|
-
split_uri = csuriquery.split("file=")
|
317
|
-
download = split_uri[1]
|
318
|
-
p download
|
319
|
-
end
|
320
|
-
return download
|
321
|
-
end
|
310
|
+
attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :pgcount, :robots, :vtype,
|
311
|
+
:returnhit, :searchphrase, :orgname, :city, :country, :region, :grouphash, :group, :groupcount, :pdfstem, :pdflist
|
322
312
|
|
323
|
-
#
|
324
|
-
|
325
|
-
@searchphrase = p_searchphrase if p_searchphrase
|
326
|
-
@rank = calculate_rank(@pages, @duration, @visitortype)
|
327
|
-
@pageids = []
|
328
|
-
if p_pageid
|
329
|
-
@pageids << p_pageid
|
330
|
-
else
|
331
|
-
z=(/(PageID)=(\d+)/).match(csuriquery)
|
332
|
-
if z
|
333
|
-
p_pageid = z[2].to_i
|
334
|
-
@pageids << p_pageid
|
335
|
-
@download_file = get_download(csuriquery, timetaken) if p_pageid == @@download_page_number
|
336
|
-
end
|
337
|
-
end
|
338
|
-
end
|
339
|
-
|
340
|
-
# This method initializes the Visit object. Load object with parsed data
|
341
|
-
def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
|
313
|
+
# This method initializes the Visit object. Loads object with parsed data from first captured line
|
314
|
+
def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_returnhit, p_pdfstem, visit_timeout)
|
342
315
|
@ip = ip
|
343
316
|
@start_dt = log_dt
|
344
|
-
@expire_dt = @start_dt +
|
317
|
+
@expire_dt = @start_dt + visit_timeout
|
345
318
|
@end_dt = @start_dt
|
346
319
|
@duration = 0
|
347
320
|
@hits = 0
|
348
|
-
@
|
349
|
-
@
|
350
|
-
@
|
351
|
-
@
|
321
|
+
@pgcount = 0
|
322
|
+
@pgcount = 1 if p_pageflag == "Y"
|
323
|
+
@vtype = p_visitortype
|
324
|
+
@vtype = "S" if csuristem == "/robots.txt"
|
325
|
+
@returnhit = p_returnhit
|
326
|
+
@orgname = ""
|
327
|
+
@city = ""
|
328
|
+
@country = ""
|
329
|
+
@region = ""
|
330
|
+
@grouphash = Hash.new
|
331
|
+
@group = ""
|
332
|
+
@groupcount = 0
|
333
|
+
@orgmatch = ""
|
352
334
|
@searchphrase = ""
|
353
|
-
|
335
|
+
@pdfstem = p_pdfstem
|
336
|
+
@pdflist = Array.new
|
337
|
+
@pdflist << @pdfstem if not @pdfstem.nil?
|
338
|
+
@rank = calculate_rank(@pgcount, @duration, @vtype, @pdflist.length)
|
339
|
+
end
|
340
|
+
|
341
|
+
# This method calculates the rank
|
342
|
+
def calculate_rank(pgcount, duration, visitortype, pdfhits)
|
343
|
+
if pgcount < 4
|
344
|
+
rank = pgcount
|
345
|
+
elsif pgcount > 10
|
346
|
+
rank = 5
|
347
|
+
else
|
348
|
+
rank = 4
|
349
|
+
end
|
350
|
+
rank = 2 if duration < 21
|
351
|
+
rank = 1 if duration < 11
|
352
|
+
rank = 0 if duration < 11 and pgcount > 40
|
353
|
+
rank = 0 if pgcount > duration/5
|
354
|
+
rank = 0 if duration == 0
|
355
|
+
rank = 0 if visitortype == "D"
|
356
|
+
rank = rank + 1 if pdfhits > 0
|
357
|
+
rank = 5 if rank > 5
|
358
|
+
rank = -rank if visitortype == "S"
|
359
|
+
return rank
|
354
360
|
end
|
355
361
|
|
356
362
|
# This method updates the Visit object with new parsed data
|
357
|
-
def update(end_dt,
|
363
|
+
def update(end_dt, p_visitortype, p_pageflag, p_returnhit, p_pdfstem)
|
358
364
|
@end_dt = end_dt
|
359
365
|
@duration = (@end_dt - @start_dt).to_i
|
360
366
|
@hits = @hits + 1
|
361
|
-
@
|
362
|
-
@
|
363
|
-
|
367
|
+
@pgcount = @pgcount + 1 if p_pageflag == "Y"
|
368
|
+
@vtype = p_visitortype if @vtype == "H"
|
369
|
+
@returnhit = p_returnhit if @returnhit == "N"
|
370
|
+
@pdfstem = p_pdfstem
|
371
|
+
@pdflist << @pdfstem if @pdfstem and @pdflist.index(@pdfstem).nil?
|
372
|
+
@rank = calculate_rank(@pgcount, @duration, @vtype, @pdflist.length)
|
373
|
+
end
|
374
|
+
|
375
|
+
# This method updates the Visit object with results of the whois lookup
|
376
|
+
def add_details(orgname, city, country, region)
|
377
|
+
@orgname = orgname
|
378
|
+
@city = city
|
379
|
+
@country = country
|
380
|
+
@region = region
|
381
|
+
end
|
382
|
+
|
383
|
+
def getip
|
384
|
+
@ip
|
385
|
+
end
|
386
|
+
|
387
|
+
# Get rank from object
|
388
|
+
def getrank
|
389
|
+
@rank
|
390
|
+
end
|
391
|
+
|
392
|
+
# Add count to group
|
393
|
+
def increment_group(group)
|
394
|
+
@grouphash[group] = @grouphash[group].to_i + 1
|
395
|
+
end
|
396
|
+
|
397
|
+
# Classify the visit based on various factors
|
398
|
+
def classify_visit
|
399
|
+
@group = "none"
|
400
|
+
@groupcount = 0
|
401
|
+
if @grouphash.length > 0
|
402
|
+
z = @grouphash.invert.sort
|
403
|
+
zlast = z[z.length-1]
|
404
|
+
@group = zlast[1]
|
405
|
+
@groupcount = z.length
|
406
|
+
end
|
407
|
+
case @group
|
408
|
+
when "news", "company", "resources"
|
409
|
+
@persona = "Analyst"
|
410
|
+
when "home", "contacts"
|
411
|
+
@persona = "Tirekicker"
|
412
|
+
when "products", "solutions"
|
413
|
+
@persona = "Suspect"
|
414
|
+
when "careers"
|
415
|
+
@persona = "Jobhunter"
|
416
|
+
when "evolve"
|
417
|
+
@persona = "Prospect"
|
418
|
+
when "partners"
|
419
|
+
@persona = "Barney"
|
420
|
+
when "customers"
|
421
|
+
@persona = "Poacher"
|
422
|
+
else
|
423
|
+
@persona = "None"
|
424
|
+
end
|
425
|
+
@persona = "Bouncer" if @rank < 3
|
426
|
+
@persona = "Prospect" if @persona == "Suspect" and ((@rank == 4 and @returnhit == "Y") or @rank == 5)
|
364
427
|
end
|
365
428
|
|
429
|
+
# This method looks to match the orgname against the orgs file
|
430
|
+
def matchorg(orgs)
|
431
|
+
@orgmatch = ""
|
432
|
+
orgs.each {|group, names|
|
433
|
+
names.each { |n|
|
434
|
+
if @orgname.index(n)
|
435
|
+
@orgmatch = group
|
436
|
+
break
|
437
|
+
end
|
438
|
+
}
|
439
|
+
}
|
440
|
+
end
|
441
|
+
|
442
|
+
# Print short output with key fields from the object
|
366
443
|
def sendoutput
|
367
|
-
|
368
|
-
iplong
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
444
|
+
iplong = @ip+" "
|
445
|
+
p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@vtype+" Pgs> "+@pgcount.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s
|
446
|
+
end
|
447
|
+
|
448
|
+
# Print long output with key fields from the object
|
449
|
+
def printoutput
|
450
|
+
iplong = @ip+" "
|
451
|
+
p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@vtype+" Pgs> "+@pgcount.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s+" Org> "+@orgname+" City> "+@city+" Country> "+@country+" Region> "+@region
|
452
|
+
end
|
453
|
+
|
454
|
+
#def createcsvheader(fileout)
|
455
|
+
#fileout.puts("ipaddress, date, time, vtype, pgcount, duration, rank, returnhit, orgname, city, country, region")
|
456
|
+
#end
|
457
|
+
|
458
|
+
#def createcsvoutput(fileout)
|
459
|
+
#iplong = @ip+" "
|
460
|
+
#fileout.puts(iplong[0..14]+","+@start_dt.to_s[0..10]+","+@start_dt.to_s[11..18]+","+@vtype+","+@pgcount.to_s+","+@duration.to_s+","+@rank.to_s+","+@returnhit+","+@orgname+","+@city+","+@country+","+@region)
|
461
|
+
#end
|
462
|
+
|
463
|
+
# Store output to Google docs spreadsheet
|
464
|
+
def gdocsoutput (ws, row_count)
|
465
|
+
r = row_count+2
|
466
|
+
ws[r,1] = @ip
|
467
|
+
ws[r,2] = @start_dt.strftime("%m/%d/%Y")
|
468
|
+
ws[r,3] = @start_dt.strftime("%I:%M%p")
|
469
|
+
ws[r,4] = @pgcount
|
470
|
+
ws[r,5] = @duration
|
471
|
+
ws[r,6] = (@pdflist.length)
|
472
|
+
ws[r,7] = @rank
|
473
|
+
ws[r,8] = @orgname
|
474
|
+
ws[r,9] = @city
|
475
|
+
ws[r,10] = @country
|
476
|
+
ws[r,11] = @region
|
477
|
+
ws[r,12] = @returnhit
|
478
|
+
ws[r,13] = @persona
|
479
|
+
ws[r,14] = @group
|
480
|
+
ws[r,15] = @groupcount
|
481
|
+
ws[r,16] = @orgmatch
|
482
|
+
ws.save()
|
373
483
|
end
|
484
|
+
|
485
|
+
# Save output to database file
|
486
|
+
def saveoutput
|
487
|
+
@vzvisit = Vzvisit.new
|
488
|
+
@vzvisit[:ipaddr] = @ip
|
489
|
+
@vzvisit[:vdatetime] = @start_dt
|
490
|
+
@vzvisit[:vtype] = @vtype
|
491
|
+
@vzvisit[:pgcount] = @pgcount
|
492
|
+
@vzvisit[:duration] = @duration
|
493
|
+
@vzvisit[:rank] = @rank
|
494
|
+
@vzvisit[:orgname] = @orgname
|
495
|
+
@vzvisit[:city] = @city
|
496
|
+
@vzvisit[:country] = @country
|
497
|
+
@vzvisit[:region] = @region
|
498
|
+
@vzvisit[:returnhit] = @returnhit
|
499
|
+
@vzvisit[:group] = @group
|
500
|
+
@vzvisit[:groupcount] = @groupcount
|
501
|
+
@vzvisit[:persona] = @persona
|
502
|
+
@vzvisit[:orgmatch] = @orgmatch
|
503
|
+
@vzvisit[:pdfhits] = @pdflist.length
|
504
|
+
@vzvisit.save
|
505
|
+
end
|
506
|
+
|
374
507
|
end
|
375
508
|
|
376
509
|
# This class creates and manages a list to keep track of the visits that are in process (cached)
|
data/log/parse.log
CHANGED
@@ -1,79 +1,25 @@
|
|
1
|
-
# Logfile created on
|
2
|
-
W, [
|
3
|
-
|
4
|
-
W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
5
|
-
W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
6
|
-
|
7
|
-
W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
8
|
-
W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
9
|
-
|
10
|
-
W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
11
|
-
W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
12
|
-
|
13
|
-
W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
14
|
-
W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
|
15
|
-
|
16
|
-
W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
17
|
-
W, [2011-06-05T22:08:53.528629 #560] WARN -- :
|
18
|
-
|
19
|
-
W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
20
|
-
W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
21
|
-
|
22
|
-
W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
23
|
-
W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
24
|
-
|
25
|
-
W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
26
|
-
W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
27
|
-
|
28
|
-
W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
29
|
-
W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
30
|
-
|
31
|
-
W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
32
|
-
W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
33
|
-
|
34
|
-
W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
35
|
-
W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
36
|
-
|
37
|
-
W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
38
|
-
W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
39
|
-
|
40
|
-
W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
41
|
-
W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
42
|
-
|
43
|
-
W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
44
|
-
W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
45
|
-
|
46
|
-
W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
47
|
-
W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
48
|
-
|
49
|
-
W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
50
|
-
W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
51
|
-
|
52
|
-
W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
53
|
-
W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
54
|
-
|
55
|
-
W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
56
|
-
W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
57
|
-
|
58
|
-
W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
59
|
-
W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
60
|
-
|
61
|
-
W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
62
|
-
W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
63
|
-
|
64
|
-
W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
65
|
-
W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
66
|
-
|
67
|
-
W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
68
|
-
W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
1
|
+
# Logfile created on Sat May 05 17:30:06 -0400 2012 by logger.rb/22285
|
2
|
+
W, [2012-05-05T17:30:06.758356 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
69
3
|
|
70
|
-
W, [
|
71
|
-
W, [
|
4
|
+
W, [2012-05-05T17:30:06.758765 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
5
|
+
W, [2012-05-05T17:30:08.532729 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
72
6
|
|
73
|
-
W, [
|
74
|
-
W, [
|
7
|
+
W, [2012-05-05T17:30:08.532918 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
8
|
+
W, [2012-05-05T17:30:09.348402 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
75
9
|
|
76
|
-
W, [
|
77
|
-
W, [
|
10
|
+
W, [2012-05-05T17:30:09.348559 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
11
|
+
W, [2012-05-05T17:30:10.385291 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
78
12
|
|
79
|
-
W, [
|
13
|
+
W, [2012-05-05T17:30:10.385392 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
14
|
+
W, [2012-05-05T17:30:43.757483 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
15
|
+
|
16
|
+
W, [2012-05-05T17:30:43.757621 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
17
|
+
W, [2012-05-05T17:30:45.519507 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
18
|
+
|
19
|
+
W, [2012-05-05T17:30:45.519672 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
20
|
+
W, [2012-05-05T17:30:46.357673 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
21
|
+
|
22
|
+
W, [2012-05-05T17:30:46.357847 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
23
|
+
W, [2012-05-05T17:30:47.451169 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
24
|
+
|
25
|
+
W, [2012-05-05T17:30:47.451274 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Logfile created on 2011-06-05 21:46:32 -0400 by logger.rb/25413
|
2
|
+
W, [2011-06-05T21:46:32.006263 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
3
|
+
|
4
|
+
W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
5
|
+
W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
6
|
+
|
7
|
+
W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
8
|
+
W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
9
|
+
|
10
|
+
W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
11
|
+
W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
12
|
+
|
13
|
+
W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
14
|
+
W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
|
15
|
+
|
16
|
+
W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
17
|
+
W, [2011-06-05T22:08:53.528629 #560] WARN -- :
|
18
|
+
|
19
|
+
W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
20
|
+
W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
21
|
+
|
22
|
+
W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
23
|
+
W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
24
|
+
|
25
|
+
W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
26
|
+
W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
27
|
+
|
28
|
+
W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
29
|
+
W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
30
|
+
|
31
|
+
W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
32
|
+
W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
33
|
+
|
34
|
+
W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
35
|
+
W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
36
|
+
|
37
|
+
W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
38
|
+
W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
39
|
+
|
40
|
+
W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
41
|
+
W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
42
|
+
|
43
|
+
W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
44
|
+
W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
45
|
+
|
46
|
+
W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
47
|
+
W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
48
|
+
|
49
|
+
W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
50
|
+
W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
51
|
+
|
52
|
+
W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
53
|
+
W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
54
|
+
|
55
|
+
W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
56
|
+
W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
57
|
+
|
58
|
+
W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
59
|
+
W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
60
|
+
|
61
|
+
W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
62
|
+
W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
63
|
+
|
64
|
+
W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
65
|
+
W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
66
|
+
|
67
|
+
W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
68
|
+
W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
69
|
+
|
70
|
+
W, [2011-06-10T22:04:33.846398 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
71
|
+
W, [2011-06-10T22:04:34.038272 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
72
|
+
|
73
|
+
W, [2011-06-10T22:04:34.038371 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
74
|
+
W, [2011-06-10T22:04:34.123624 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
75
|
+
|
76
|
+
W, [2011-06-10T22:04:34.123712 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
77
|
+
W, [2011-06-10T22:04:34.221506 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
|
78
|
+
|
79
|
+
W, [2011-06-10T22:04:34.221596 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
|
data/log/system.log
CHANGED
@@ -1,66 +1,12 @@
|
|
1
|
-
# Logfile created on
|
2
|
-
I, [
|
3
|
-
I, [
|
4
|
-
I, [
|
5
|
-
I, [
|
6
|
-
I, [
|
7
|
-
I, [
|
8
|
-
I, [
|
9
|
-
I, [
|
10
|
-
I, [
|
11
|
-
I, [
|
12
|
-
I, [
|
13
|
-
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Record count is 30474
|
14
|
-
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Hit count is 30474
|
15
|
-
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Page count is 0
|
16
|
-
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Total visit count is 4484
|
17
|
-
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Human visit count is 4227
|
18
|
-
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Drop visit count is 0
|
19
|
-
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Spider visit count is 257
|
20
|
-
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Batch processing time 13.675125
|
21
|
-
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : ending ... >>> 2011-06-05 21:47:06 -0400
|
22
|
-
I, [2011-06-05T21:47:44.613806 #1112] INFO -- : starting ... >>> 2011-06-05 21:47:44 -0400
|
23
|
-
I, [2011-06-05T21:48:01.451639 #4252] INFO -- : starting ... >>> 2011-06-05 21:48:01 -0400
|
24
|
-
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Record count is 11870
|
25
|
-
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Hit count is 11870
|
26
|
-
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Page count is 0
|
27
|
-
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Total visit count is 1
|
28
|
-
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Human visit count is 1
|
29
|
-
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Drop visit count is 0
|
30
|
-
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Spider visit count is 0
|
31
|
-
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Batch processing time 3.744208
|
32
|
-
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : ending ... >>> 2011-06-05 21:48:05 -0400
|
33
|
-
I, [2011-06-05T21:48:20.829574 #4544] INFO -- : starting ... >>> 2011-06-05 21:48:20 -0400
|
34
|
-
I, [2011-06-05T21:50:44.333188 #960] INFO -- : starting ... >>> 2011-06-05 21:50:44 -0400
|
35
|
-
I, [2011-06-05T21:51:56.322305 #4712] INFO -- : starting ... >>> 2011-06-05 21:51:56 -0400
|
36
|
-
I, [2011-06-05T21:53:42.440375 #4832] INFO -- : starting ... >>> 2011-06-05 21:53:42 -0400
|
37
|
-
I, [2011-06-05T22:07:49.079943 #4560] INFO -- : starting ... >>> 2011-06-05 22:07:49 -0400
|
38
|
-
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Record count is 47
|
39
|
-
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Hit count is 42
|
40
|
-
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Page count is 0
|
41
|
-
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Total visit count is 1
|
42
|
-
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Human visit count is 1
|
43
|
-
I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Drop visit count is 0
|
44
|
-
I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Spider visit count is 0
|
45
|
-
I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Batch processing time 0.105006
|
46
|
-
I, [2011-06-05T22:07:49.185949 #4560] INFO -- : ending ... >>> 2011-06-05 22:07:49 -0400
|
47
|
-
I, [2011-06-05T22:08:53.515628 #560] INFO -- : starting ... >>> 2011-06-05 22:08:53 -0400
|
48
|
-
I, [2011-06-05T22:08:53.529629 #560] INFO -- : Record count is 47
|
49
|
-
I, [2011-06-05T22:08:53.529629 #560] INFO -- : Hit count is 42
|
50
|
-
I, [2011-06-05T22:08:53.529629 #560] INFO -- : Page count is 0
|
51
|
-
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Total visit count is 1
|
52
|
-
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Human visit count is 1
|
53
|
-
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Drop visit count is 0
|
54
|
-
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Spider visit count is 0
|
55
|
-
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Batch processing time 0.014
|
56
|
-
I, [2011-06-05T22:08:53.531629 #560] INFO -- : ending ... >>> 2011-06-05 22:08:53 -0400
|
57
|
-
I, [2011-06-05T22:37:50.150958 #3476] INFO -- : starting ... >>> 2011-06-05 22:37:50 -0400
|
58
|
-
I, [2011-06-05T22:37:51.664045 #3476] INFO -- : Record count is 5458
|
59
|
-
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Hit count is 5438
|
60
|
-
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Page count is 506
|
61
|
-
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Total visit count is 461
|
62
|
-
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Human visit count is 119
|
63
|
-
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Drop visit count is 78
|
64
|
-
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Spider visit count is 264
|
65
|
-
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Batch processing time 1.513087
|
66
|
-
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : ending ... >>> 2011-06-05 22:37:51 -0400
|
1
|
+
# Logfile created on Sat May 05 17:04:25 -0400 2012 by logger.rb/22285
|
2
|
+
I, [2012-05-05T17:04:25.715059 #2398] INFO -- : starting ... >>> Sat May 05 17:04:25 -0400 2012
|
3
|
+
I, [2012-05-05T17:08:58.497120 #2445] INFO -- : starting ... >>> Sat May 05 17:08:58 -0400 2012
|
4
|
+
I, [2012-05-05T17:12:10.618586 #2511] INFO -- : starting ... >>> Sat May 05 17:12:10 -0400 2012
|
5
|
+
I, [2012-05-05T17:18:56.448474 #2526] INFO -- : starting ... >>> Sat May 05 17:18:56 -0400 2012
|
6
|
+
I, [2012-05-05T17:19:48.272182 #2531] INFO -- : starting ... >>> Sat May 05 17:19:48 -0400 2012
|
7
|
+
I, [2012-05-05T17:23:04.812969 #2572] INFO -- : starting ... >>> Sat May 05 17:23:04 -0400 2012
|
8
|
+
I, [2012-05-05T17:27:53.760281 #2591] INFO -- : starting ... >>> Sat May 05 17:27:53 -0400 2012
|
9
|
+
I, [2012-05-05T17:30:00.755342 #2592] INFO -- : starting ... >>> Sat May 05 17:30:00 -0400 2012
|
10
|
+
I, [2012-05-05T17:30:10.734129 #2592] INFO -- : ending ... >>> Sat May 05 17:30:10 -0400 2012
|
11
|
+
I, [2012-05-05T17:30:37.881134 #2593] INFO -- : starting ... >>> Sat May 05 17:30:37 -0400 2012
|
12
|
+
I, [2012-05-05T17:30:47.799064 #2593] INFO -- : ending ... >>> Sat May 05 17:30:47 -0400 2012
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# Logfile created on 2011-06-05 21:46:30 -0400 by logger.rb/25413
|
2
|
+
I, [2011-06-05T21:46:30.867462 #3444] INFO -- : starting ... >>> 2011-06-05 21:46:30 -0400
|
3
|
+
I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Record count is 5458
|
4
|
+
I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Hit count is 5438
|
5
|
+
I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Page count is 506
|
6
|
+
I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Total visit count is 461
|
7
|
+
I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Human visit count is 119
|
8
|
+
I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Drop visit count is 78
|
9
|
+
I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Spider visit count is 264
|
10
|
+
I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Batch processing time 1.677403
|
11
|
+
I, [2011-06-05T21:46:32.576065 #3444] INFO -- : ending ... >>> 2011-06-05 21:46:32 -0400
|
12
|
+
I, [2011-06-05T21:46:53.028704 #5108] INFO -- : starting ... >>> 2011-06-05 21:46:53 -0400
|
13
|
+
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Record count is 30474
|
14
|
+
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Hit count is 30474
|
15
|
+
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Page count is 0
|
16
|
+
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Total visit count is 4484
|
17
|
+
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Human visit count is 4227
|
18
|
+
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Drop visit count is 0
|
19
|
+
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Spider visit count is 257
|
20
|
+
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Batch processing time 13.675125
|
21
|
+
I, [2011-06-05T21:47:06.719429 #5108] INFO -- : ending ... >>> 2011-06-05 21:47:06 -0400
|
22
|
+
I, [2011-06-05T21:47:44.613806 #1112] INFO -- : starting ... >>> 2011-06-05 21:47:44 -0400
|
23
|
+
I, [2011-06-05T21:48:01.451639 #4252] INFO -- : starting ... >>> 2011-06-05 21:48:01 -0400
|
24
|
+
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Record count is 11870
|
25
|
+
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Hit count is 11870
|
26
|
+
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Page count is 0
|
27
|
+
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Total visit count is 1
|
28
|
+
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Human visit count is 1
|
29
|
+
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Drop visit count is 0
|
30
|
+
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Spider visit count is 0
|
31
|
+
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Batch processing time 3.744208
|
32
|
+
I, [2011-06-05T21:48:05.195847 #4252] INFO -- : ending ... >>> 2011-06-05 21:48:05 -0400
|
33
|
+
I, [2011-06-05T21:48:20.829574 #4544] INFO -- : starting ... >>> 2011-06-05 21:48:20 -0400
|
34
|
+
I, [2011-06-05T21:50:44.333188 #960] INFO -- : starting ... >>> 2011-06-05 21:50:44 -0400
|
35
|
+
I, [2011-06-05T21:51:56.322305 #4712] INFO -- : starting ... >>> 2011-06-05 21:51:56 -0400
|
36
|
+
I, [2011-06-05T21:53:42.440375 #4832] INFO -- : starting ... >>> 2011-06-05 21:53:42 -0400
|
37
|
+
I, [2011-06-05T22:07:49.079943 #4560] INFO -- : starting ... >>> 2011-06-05 22:07:49 -0400
|
38
|
+
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Record count is 47
|
39
|
+
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Hit count is 42
|
40
|
+
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Page count is 0
|
41
|
+
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Total visit count is 1
|
42
|
+
I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Human visit count is 1
|
43
|
+
I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Drop visit count is 0
|
44
|
+
I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Spider visit count is 0
|
45
|
+
I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Batch processing time 0.105006
|
46
|
+
I, [2011-06-05T22:07:49.185949 #4560] INFO -- : ending ... >>> 2011-06-05 22:07:49 -0400
|
47
|
+
I, [2011-06-05T22:08:53.515628 #560] INFO -- : starting ... >>> 2011-06-05 22:08:53 -0400
|
48
|
+
I, [2011-06-05T22:08:53.529629 #560] INFO -- : Record count is 47
|
49
|
+
I, [2011-06-05T22:08:53.529629 #560] INFO -- : Hit count is 42
|
50
|
+
I, [2011-06-05T22:08:53.529629 #560] INFO -- : Page count is 0
|
51
|
+
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Total visit count is 1
|
52
|
+
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Human visit count is 1
|
53
|
+
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Drop visit count is 0
|
54
|
+
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Spider visit count is 0
|
55
|
+
I, [2011-06-05T22:08:53.530629 #560] INFO -- : Batch processing time 0.014
|
56
|
+
I, [2011-06-05T22:08:53.531629 #560] INFO -- : ending ... >>> 2011-06-05 22:08:53 -0400
|
57
|
+
I, [2011-06-05T22:37:50.150958 #3476] INFO -- : starting ... >>> 2011-06-05 22:37:50 -0400
|
58
|
+
I, [2011-06-05T22:37:51.664045 #3476] INFO -- : Record count is 5458
|
59
|
+
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Hit count is 5438
|
60
|
+
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Page count is 506
|
61
|
+
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Total visit count is 461
|
62
|
+
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Human visit count is 119
|
63
|
+
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Drop visit count is 78
|
64
|
+
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Spider visit count is 264
|
65
|
+
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Batch processing time 1.513087
|
66
|
+
I, [2011-06-05T22:37:51.665045 #3476] INFO -- : ending ... >>> 2011-06-05 22:37:51 -0400
|
data/testit.rb
CHANGED
@@ -6,9 +6,8 @@
|
|
6
6
|
#
|
7
7
|
# Author:: Al Kivi <al.kivi@vizitrax.com>
|
8
8
|
|
9
|
-
#
|
10
|
-
|
11
|
-
# Or replace the above line to reference the gem version, if installed
|
9
|
+
require 'rubygems' # needed for ruby 1.8.7
|
10
|
+
# require '...\vizi_tracker\lib\vizi\vizi_tracker'
|
12
11
|
require 'vizi_tracker'
|
13
12
|
|
14
13
|
require 'yaml'
|
@@ -21,10 +20,10 @@ config = YAML.load_file("config/logger.yml")
|
|
21
20
|
|
22
21
|
# Initialize the log parser
|
23
22
|
parser = Vizi::LogParser.new(config["drop_ips"], config["spider_ips"],
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
config["spider_names"], config["include_urls"], config["exclude_urls"],
|
24
|
+
config["url_stem"], config["accept_only_url_stem"],config["hostname"],
|
25
|
+
config["drop_refers_by_hostname"], config["usual_agents"],
|
26
|
+
config["use_local_time"])
|
28
27
|
|
29
28
|
syslog = Logger.new('./log/system.log',shift_age = 'weekly')
|
30
29
|
case config["log_level"]
|
@@ -57,6 +56,7 @@ File.open('./data/exlog.log', 'r') do |file|
|
|
57
56
|
logformat = nil
|
58
57
|
# Begin to parse each record
|
59
58
|
while(line = file.gets)
|
59
|
+
p line
|
60
60
|
parsed_data = parser.parse_line(line, logformat)
|
61
61
|
logformat = parsed_data[:p_logformat]
|
62
62
|
rec_count = rec_count + 1
|
@@ -65,30 +65,29 @@ File.open('./data/exlog.log', 'r') do |file|
|
|
65
65
|
page_count = page_count + 1 if parsed_data[:p_pageflag]
|
66
66
|
@visit=vlist.find_by_ip(parsed_data[:ip])
|
67
67
|
if @visit.nil?
|
68
|
-
|
69
|
-
|
68
|
+
vlist.append(Vizi::Visit.new(parsed_data[:ip],parsed_data[:datetime],parsed_data[:csuristem],parsed_data[:csuriquery], parsed_data[:timetaken],
|
69
|
+
parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_returnhit],parsed_data[:p_pdfstem],config["visit_timeout"]))
|
70
70
|
@visit=vlist.find_by_ip(parsed_data[:ip])
|
71
71
|
visit_count = visit_count + 1
|
72
72
|
else
|
73
|
-
@visit.update(parsed_data[:datetime],parsed_data[:
|
74
|
-
parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase], parsed_data[:p_pageid])
|
73
|
+
@visit.update(parsed_data[:datetime],parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_returnhit],parsed_data[:p_pdfstem])
|
75
74
|
end
|
76
75
|
@visits = vlist.find_expired(@visit.start_dt)
|
77
76
|
if @visits
|
78
77
|
@visits.sendoutput
|
79
78
|
vlist.delete(@visits)
|
80
|
-
human_count = human_count + 1 if @visits.
|
81
|
-
drop_count = drop_count + 1 if @visits.
|
82
|
-
spider_count = spider_count + 1 if @visits.
|
79
|
+
human_count = human_count + 1 if @visits.vtype == "H"
|
80
|
+
drop_count = drop_count + 1 if @visits.vtype == "D"
|
81
|
+
spider_count = spider_count + 1 if @visits.vtype == "S"
|
83
82
|
end
|
84
83
|
break if rec_count == max_rec_count
|
85
84
|
end
|
86
85
|
@visits = vlist.find_all
|
87
86
|
@visits.each {|v|
|
88
87
|
v.sendoutput
|
89
|
-
human_count = human_count + 1 if v.
|
90
|
-
drop_count = drop_count + 1 if v.
|
91
|
-
spider_count = spider_count + 1 if v.
|
88
|
+
human_count = human_count + 1 if v.vtype == "H"
|
89
|
+
drop_count = drop_count + 1 if v.vtype == "D"
|
90
|
+
spider_count = spider_count + 1 if v.vtype == "S"
|
92
91
|
}
|
93
92
|
if config["summary_flag"]
|
94
93
|
syslog.info "Record count is "+rec_count.to_s
|
Binary file
|
data/vizi_tracker.gemspec
CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
|
|
2
2
|
|
3
3
|
spec = Gem::Specification.new do |s|
|
4
4
|
s.name = 'vizi_tracker'
|
5
|
-
s.version = '0.
|
5
|
+
s.version = '0.4.0'
|
6
6
|
s.summary = "Visit tracking from Apache or IIS web log files"
|
7
7
|
s.description = "This module provides a set of classes to support the parsing of web log files and
|
8
8
|
the creation of visits from the individual parsed web log records.
|
@@ -17,5 +17,5 @@ spec = Gem::Specification.new do |s|
|
|
17
17
|
s.homepage = "http://www.vizitrax.com"
|
18
18
|
s.email = "al.kivi@yahoo.com"
|
19
19
|
s.has_rdoc = true
|
20
|
-
s.required_ruby_version = '>= 1.8.
|
20
|
+
s.required_ruby_version = '>= 1.8.7'
|
21
21
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vizi_tracker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 4
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.4.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Al Kivi
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-05-05 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies: []
|
21
21
|
|
@@ -102,6 +102,8 @@ files:
|
|
102
102
|
- Rakefile
|
103
103
|
- log/parse.log
|
104
104
|
- log/system.log
|
105
|
+
- log/system.log.20120428
|
106
|
+
- log/parse.log.20120428
|
105
107
|
- test/parser_test.rb
|
106
108
|
- test/test_helper.rb
|
107
109
|
- config/logger.yml
|
@@ -109,6 +111,7 @@ files:
|
|
109
111
|
- config/logger_sample.yml
|
110
112
|
- config/logger_apache.yml
|
111
113
|
- testit.rb
|
114
|
+
- vizi_tracker-0.4.0.gem
|
112
115
|
- vizi_tracker.gemspec
|
113
116
|
has_rdoc: true
|
114
117
|
homepage: http://www.vizitrax.com
|
@@ -124,12 +127,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
124
127
|
requirements:
|
125
128
|
- - ">="
|
126
129
|
- !ruby/object:Gem::Version
|
127
|
-
hash:
|
130
|
+
hash: 57
|
128
131
|
segments:
|
129
132
|
- 1
|
130
133
|
- 8
|
131
|
-
-
|
132
|
-
version: 1.8.
|
134
|
+
- 7
|
135
|
+
version: 1.8.7
|
133
136
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
137
|
none: false
|
135
138
|
requirements:
|