vizi_tracker 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/config/logger.yml CHANGED
@@ -1,49 +1,39 @@
1
- # Configuration data for logger
2
- #max_rec_count: 100
1
+ # basic configuration data ...................................
2
+ max_rec_count: 99999
3
+ sub_directory: data/
3
4
  visit_timeout: 1200
4
- summary_flag: true
5
- convert_to_lower_case: true
6
- log_level: warn
7
5
  #log_level options are debug, info, warn, error, fatal
8
- homepage: /home.aspx
9
- #homepage: /
10
- accept_only_homepage: true
11
- page_urls:
12
- - aspx
13
- - asp
14
- hide_urls:
15
- - css
16
- - js
17
- - gif
18
- - swf
19
- - ico
20
- - chart
21
- - robots
6
+ log_level: info
7
+ log_record_counts: true
8
+ convert_to_lower_case: true
9
+ url_stem: /
10
+ accept_only_url_stem: false
11
+ include_urls:
12
+ - .htm
13
+ - .pdf
14
+ - home.aspx
15
+ exclude_urls:
16
+ - .html
17
+ # sitemap_match: true
22
18
  hostname: www.sigma-systems.com
23
- drop_refers_by_hostname: true
19
+ drop_refers_by_hostname: false
20
+ # output_where_rank_over: 2
24
21
  use_local_time: true
25
- download_page_number: 45
26
22
  drop_ips:
23
+ - 10.131.0.4
24
+ - 76.12.171.4
27
25
  - 76.12.185.100
28
26
  spider_ips:
29
27
  - 66.98.254.55
30
- - 64.208.168.252
31
- - 64.235.108.183
32
- - 76.2.144.115
33
- - 66.98.254.236
34
- - 202.108.22.132
35
- - 89.122.29.77
36
- - 95.174.93.222
37
- - 66.55.37.179
38
- - 198.45.18.20
39
- - 38.104.227.3
28
+ # post processing instructions ..............................
29
+ output_where_rank_over: 2
30
+ sitemap_match: true
31
+ # spider and user agents ....................................
40
32
  spider_names:
41
33
  - bot
42
34
  - spider
43
35
  - slurp
44
36
  - root.exe
45
- - .dll
46
- - slurp
47
37
  - looksmart
48
38
  - nutchsvc
49
39
  - iconsurf
@@ -55,15 +45,63 @@ spider_names:
55
45
  - konsqueror
56
46
  - crawler
57
47
  - searchme
58
- - java/1.6.0_04
48
+ - findlinks
59
49
  - scoutjet
60
50
  - yeti
61
51
  - yandex
62
- # convert urls to assigned numbers where numbers cannot be parsed from url
63
- assigned_numbers:
64
- - /visit/index,1
65
- - /visit/show/,2
66
- - /visit/vcardedit/,3
67
- - /visit/showmap_na,4
68
- - /visit/showmap_row,5
69
- match_page_numbers: false
52
+ - fetch
53
+ - nutch
54
+ - sleuth
55
+ - globalspec
56
+ - openurl
57
+ - python
58
+ - acoon
59
+ - itim
60
+ - metauri
61
+ - extractor
62
+ - exractor
63
+ - pagegetter
64
+ - linkdex
65
+ - website
66
+ - xfruits
67
+ - binlar
68
+ - abacus
69
+ - anemone
70
+ - ichiro
71
+ - scraping
72
+ - xpymep
73
+ - swish
74
+ - sitemaps
75
+ - yahoocachesystem
76
+ - sexy
77
+ - .dll
78
+ - java/1
79
+ usual_agents:
80
+ - mozilla
81
+ - opera
82
+ - chrome
83
+ - lynx
84
+ - nokia
85
+ - blackberry
86
+ - sharp
87
+ - docomo
88
+ - curl
89
+ - facebook
90
+ - microsoft
91
+ - securepoint
92
+ - wordpress
93
+ # advanced: db lookups and csv ..................................
94
+ whois_lookup: true
95
+ use_db_lookup: true
96
+ use_db_visits: true
97
+ # create_csv_file: false
98
+ # advanced: email notification ..................................
99
+ admin_email: ...
100
+ user_email: ..
101
+ email_server: ...
102
+ send_logmsg_admin: ...
103
+ # advanced: google docs connection ..............................
104
+ use_gdocs: ...
105
+ gdocs_user: ...
106
+ gdocs_password: ...
107
+ gdocs_key: ...
@@ -18,30 +18,30 @@ module Vizi
18
18
  # format string char => [:symbol to use, /regex to use when matching against log/]
19
19
  'h' => [:ip, /\d+\.\d+\.\d+\.\d+/], # apache and IIS: called c-ip in IIS
20
20
  'p' => [:sip, /\d+\.\d+\.\d+\.\d+/], # IIS:
21
- 'g' => [:auth, /\S*/], # apache:
22
- 'u' => [:username, /\S*/], # apache and IIS: called cs-username in IIS
23
- 't' => [:dtstring, /\[.*?\]/], # apache: one field with date and time
21
+ 'g' => [:auth, /\S*/], # apache:
22
+ 'u' => [:username, /\S*/], # apache and IIS: called cs-username in IIS
23
+ 't' => [:dtstring, /\[.*?\]/], # apache: one field with date and time
24
24
  'd' => [:datestring, /\d+\-\d+\-\d+/], # IIS:
25
25
  'e' => [:timestring, /\d+\:\d+\:\d+/], # IIS:
26
- 'r' => [:request, /.*?/], # apache: includes both csmethod and csuristem
27
- 'm' => [:csmethod, /\w*?/], # IIS:
28
- 'w' => [:csuristem, /\S*/], # IIS:
29
- 's' => [:status, /\d+/], # apache and IIS: is called sc_status in IIS
30
- 'b' => [:bytecount, /-|\d+/], # apache and IIS: is called cs_bytes in IIS
31
- 'v' => [:domain, /.*?/], # apache and IIS: is c-computername in IIS
32
- 'i' => [:header_lines, /.*?/], # apache: transforms to useragent or referer or cookies
33
- 'a' => [:useragent, /\S*/], # IIS:
34
- 'j' => [:referer, /\S*/], # IIS:
35
- 'k' => [:cscookie, /\d+/], # IIS:
36
- 'q' => [:csuriquery, /.*/], # IIS:
37
- 'y' => [:csbytes, /d+/], # IIS:
38
- 'o' => [:sport, /\d+/], # IIS:
39
- 'x' => [:scsubstatus, /\d+/], # IIS:
40
- 'z' => [:cshost, /\d+/], # IIS:
41
- 'l' => [:win32status, /\d+/], # IIS:
42
- 'n' => [:timetaken, /\d+/], # IIS:
43
- 'c' => [:comment, /^#/], # IIS: comment line identifier
44
- 'f' => [:fields, /^#Fields:/] # IIS: field line identifier
26
+ 'r' => [:request, /.*?/], # apache: includes both csmethod and csuristem
27
+ 'm' => [:csmethod, /\w*?/], # IIS:
28
+ 'w' => [:csuristem, /\S*/], # IIS:
29
+ 's' => [:status, /\d+/], # apache and IIS: is called sc_status in IIS
30
+ 'b' => [:bytecount, /-|\d+/], # apache and IIS: is called cs_bytes in IIS
31
+ 'v' => [:domain, /.*?/], # apache and IIS: is c-computername in IIS
32
+ 'i' => [:header_lines, /.*?/], # apache: transforms to useragent or referer or cookies
33
+ 'a' => [:useragent, /\S*/], # IIS:
34
+ 'j' => [:referer, /\S*/], # IIS:
35
+ 'k' => [:cscookie, /\d+/], # IIS:
36
+ 'q' => [:csuriquery, /.*/], # IIS:
37
+ 'y' => [:csbytes, /d+/], # IIS:
38
+ 'o' => [:sport, /\d+/], # IIS:
39
+ 'x' => [:scsubstatus, /\d+/], # IIS:
40
+ 'z' => [:cshost, /\d+/], # IIS:
41
+ 'l' => [:win32status, /\d+/], # IIS:
42
+ 'n' => [:timetaken, /\d+/], # IIS:
43
+ 'c' => [:comment, /^#/], # IIS: comment line identifier
44
+ 'f' => [:fields, /^#Fields:/] # IIS: field line identifier
45
45
  }
46
46
 
47
47
  # This method initializes the LogFormat object with fieldnames and log formats
@@ -106,7 +106,7 @@ module Vizi
106
106
  'cs-uri-query' => 'q',
107
107
  'cs(Referer)' => 'j', # internal shortened to referer
108
108
  'cs(User-Agent)' => 'a', # internal shortened to useragent
109
- 'cs(Cookie)' => 'k', # internal shortened to cscookie
109
+ 'cs(Cookie)' => 'k', # internal shortened to cscookie
110
110
  's-port' => 'o',
111
111
  'cs-host' => 'z',
112
112
  'sc-substatus' => 'x',
@@ -120,20 +120,19 @@ module Vizi
120
120
  #@@log = ActiveRecord::Base.logger
121
121
 
122
122
  # This method initialises LogParser object and loads the configurable logger control items
123
- def initialize(drop_ips, spider_ips, spider_names, page_urls, hide_urls, homepage, accept_only_homepage,
124
- hostname, drop_refers_by_hostname, use_local_time, assigned_numbers, match_page_numbers)
123
+ def initialize(drop_ips, spider_ips, spider_names, include_urls, exclude_urls, url_stem, accept_only_url_stem,
124
+ hostname, drop_refers_by_hostname, usualagents, use_local_time)
125
125
  @drops = drop_ips
126
126
  @sips = spider_ips
127
127
  @snames = spider_names
128
- @page_urls = page_urls
129
- @hide_urls = hide_urls
130
- @homepage = homepage
131
- @accept_only_homepage = accept_only_homepage
128
+ @include_urls = include_urls
129
+ @exclude_urls = exclude_urls
130
+ @url_stem = url_stem
131
+ @accept_only_url_stem = accept_only_url_stem
132
132
  @hostname = hostname
133
133
  @drop_refers_by_hostname = drop_refers_by_hostname
134
+ @usualagents = usualagents
134
135
  @use_local_time = use_local_time
135
- @assigned_numbers = assigned_numbers
136
- @match_page_numbers = match_page_numbers
137
136
  @log_format = []
138
137
  initialize_known_formats
139
138
  @parselog = Logger.new('./log/parse.log', shift_age = 'weekly')
@@ -174,7 +173,7 @@ module Vizi
174
173
  hit = nil
175
174
  i = 0
176
175
  while i < fldarray.length
177
- hit = field.index(fldarray[i])
176
+ hit = field.index(fldarray[i])
178
177
  break if hit
179
178
  i = i + 1
180
179
  end
@@ -199,6 +198,9 @@ module Vizi
199
198
 
200
199
  # apache files ... regex the file to determine logformat name
201
200
  # IIS files ... parse the fields string to determine the file contents
201
+ # :p_linetype ... line is a (C)ontrol line, (F)ield line or a good (V)isitor line
202
+ # :p_pageflag ... (Y)es is a valid page or (N)ot
203
+ # :p_vistortype ... (H)uman, (S)pider, (D)ropped or (-) Not relevant
202
204
  def parse_line(line, logformat)
203
205
  if logformat != nil
204
206
  log_format = logformat # get log_format string
@@ -219,7 +221,7 @@ module Vizi
219
221
  parsed_data[:p_logformat] = logformat
220
222
  parsed_data[:p_visitortype] = "H" # set default visitor type (H)uman
221
223
  parsed_data[:p_linetype] = "V" # linetype is (V)isitors
222
- parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
224
+ parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
223
225
  if @format_name.to_s == "w3c_f" # IIS file name ... generic
224
226
  @format = build_format(line) # parse fields to get log_format
225
227
  temp_format = Vizi::LogFormat.new(:temp, @format) # create temp format
@@ -252,28 +254,39 @@ module Vizi
252
254
 
253
255
  if parsed_data[:request]
254
256
  # splitrequest = parsed_data[:request].gsub("/", " ").split
255
- splitrequest = parsed_data[:request].split(' ')
257
+ splitrequest = parsed_data[:request].split(' ')
256
258
  parsed_data[:csuristem] = splitrequest[1]
257
259
  end
258
260
 
259
- # Now classify visitortype based on logger yml rules ...
260
-
261
- parsed_data[:p_pageflag] = false
262
- if @accept_only_homepage
263
- #p @homepage
264
- #p parsed_data[:csuristem]
265
- parsed_data[:p_pageflag] = true if parsed_data[:csuristem].downcase.index(@homepage) == 0
261
+ # Now determine visitortype based on logger yml rules ...
262
+ parsed_data[:p_pageflag] = "N"
263
+ if @accept_only_url_stem # indicates that url_stem must always appear at start of csuristem
264
+ parsed_data[:p_pageflag] = "Y" if parsed_data[:csuristem].downcase.index(@url_stem) == 0
266
265
  else
267
- parsed_data[:p_pageflag] = true if match_partial(parsed_data[:csuristem], @page_urls)
268
- end
269
- parsed_data[:p_pageflag] = false if @hide_urls and match_partial(parsed_data[:csuristem], @hide_urls)
270
-
266
+ if parsed_data[:csuristem].downcase == @url_stem
267
+ parsed_data[:p_pageflag] = "Y"
268
+ else
269
+ if @include_urls
270
+ parsed_data[:p_pageflag] = "Y" if match_partial(parsed_data[:csuristem].downcase, @include_urls)
271
+ end
272
+ if @exclude_urls
273
+ parsed_data[:p_pageflag] = "N" if match_partial(parsed_data[:csuristem].downcase, @exclude_urls)
274
+ end
275
+ end
276
+ end
277
+
278
+ parsed_data[:p_visitortype] = "D" if parsed_data[:status] == "404"
271
279
  parsed_data[:p_visitortype] = "D" if @drops and @drops.index(parsed_data[:ip])
272
- parsed_data[:p_visitortype] = "S" if @sips and@sips.index(parsed_data[:ip])
273
-
274
- if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent], @snames)
280
+ parsed_data[:p_visitortype] = "S" if @sips and @sips.index(parsed_data[:ip])
281
+ if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent].downcase, @snames)
275
282
  parsed_data[:p_visitortype] = "S"
276
283
  end
284
+ parsed_data[:p_visitortype] = "S" if parsed_data[:useragent] == "-"
285
+ parsed_data[:p_usualagent] = "Y"
286
+ parsed_data[:p_usualagent] = "N" if parsed_data[:p_visitortype] != "S" and not match_partial(parsed_data[:useragent].downcase, @usualagents)
287
+
288
+ parsed_data[:p_returnhit] = "N"
289
+ parsed_data[:p_returnhit] = "Y" if parsed_data[:status] == "304"
277
290
 
278
291
  if parsed_data[:referer]
279
292
  y = (/(search\?\S*?[pq])=(\S*?)(&)/).match(parsed_data[:referer])
@@ -283,11 +296,8 @@ module Vizi
283
296
  end
284
297
  end
285
298
 
286
- if @match_page_numbers and parsed_data[:p_pageflag]
287
- parsed_data[:p_pageid] = find_assigned_number(parsed_data[:csuristem], @assigned_numbers)
288
- # p ">>" + parsed_data[:p_pageid].to_s if parsed_data[:p_pageid]
289
- end
290
-
299
+ parsed_data[:p_pdfstem] = nil
300
+ parsed_data[:p_pdfstem] = parsed_data[:csuristem].downcase if parsed_data[:csuristem].downcase.index("/pdfs/") == 0
291
301
  end
292
302
  parsed_data
293
303
  end
@@ -297,80 +307,203 @@ module Vizi
297
307
  # Visits are determined on the basis of the IP Address hits during a timed interval
298
308
  #
299
309
  class Visit
300
- attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :pages, :robots, :visitortype, :searchphrase
301
-
302
- # This method calculates the rank
303
- def calculate_rank(pages, duration, visitortype)
304
- ranktotal = [pages,9].min*10 + [duration/60,9].min
305
- rank = ((ranktotal+10)/20).round
306
- rank = 1 if rank == 0
307
- rank = -rank if visitortype == "S"
308
- rank = 0 if visitortype == "D"
309
- return rank
310
- end
311
-
312
- # This method extracts the name of a downloaded file from the csuriquery value
313
- def get_download(csuriquery, timetaken)
314
- download = nil
315
- if timetaken.to_i > 4000
316
- split_uri = csuriquery.split("file=")
317
- download = split_uri[1]
318
- p download
319
- end
320
- return download
321
- end
310
+ attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :pgcount, :robots, :vtype,
311
+ :returnhit, :searchphrase, :orgname, :city, :country, :region, :grouphash, :group, :groupcount, :pdfstem, :pdflist
322
312
 
323
- # The method completes the initialization and update methods
324
- def add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
325
- @searchphrase = p_searchphrase if p_searchphrase
326
- @rank = calculate_rank(@pages, @duration, @visitortype)
327
- @pageids = []
328
- if p_pageid
329
- @pageids << p_pageid
330
- else
331
- z=(/(PageID)=(\d+)/).match(csuriquery)
332
- if z
333
- p_pageid = z[2].to_i
334
- @pageids << p_pageid
335
- @download_file = get_download(csuriquery, timetaken) if p_pageid == @@download_page_number
336
- end
337
- end
338
- end
339
-
340
- # This method initializes the Visit object. Load object with parsed data
341
- def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
313
+ # This method initializes the Visit object. Loads object with parsed data from first captured line
314
+ def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_returnhit, p_pdfstem, visit_timeout)
342
315
  @ip = ip
343
316
  @start_dt = log_dt
344
- @expire_dt = @start_dt + @@visit_timeout
317
+ @expire_dt = @start_dt + visit_timeout
345
318
  @end_dt = @start_dt
346
319
  @duration = 0
347
320
  @hits = 0
348
- @pages = 0
349
- @pages = 1 if p_pageflag
350
- @visitortype = p_visitortype
351
- @visitortype = "S" if csuristem == "/robots.txt"
321
+ @pgcount = 0
322
+ @pgcount = 1 if p_pageflag == "Y"
323
+ @vtype = p_visitortype
324
+ @vtype = "S" if csuristem == "/robots.txt"
325
+ @returnhit = p_returnhit
326
+ @orgname = ""
327
+ @city = ""
328
+ @country = ""
329
+ @region = ""
330
+ @grouphash = Hash.new
331
+ @group = ""
332
+ @groupcount = 0
333
+ @orgmatch = ""
352
334
  @searchphrase = ""
353
- add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
335
+ @pdfstem = p_pdfstem
336
+ @pdflist = Array.new
337
+ @pdflist << @pdfstem if not @pdfstem.nil?
338
+ @rank = calculate_rank(@pgcount, @duration, @vtype, @pdflist.length)
339
+ end
340
+
341
+ # This method calculates the rank
342
+ def calculate_rank(pgcount, duration, visitortype, pdfhits)
343
+ if pgcount < 4
344
+ rank = pgcount
345
+ elsif pgcount > 10
346
+ rank = 5
347
+ else
348
+ rank = 4
349
+ end
350
+ rank = 2 if duration < 21
351
+ rank = 1 if duration < 11
352
+ rank = 0 if duration < 11 and pgcount > 40
353
+ rank = 0 if pgcount > duration/5
354
+ rank = 0 if duration == 0
355
+ rank = 0 if visitortype == "D"
356
+ rank = rank + 1 if pdfhits > 0
357
+ rank = 5 if rank > 5
358
+ rank = -rank if visitortype == "S"
359
+ return rank
354
360
  end
355
361
 
356
362
  # This method updates the Visit object with new parsed data
357
- def update(end_dt, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
363
+ def update(end_dt, p_visitortype, p_pageflag, p_returnhit, p_pdfstem)
358
364
  @end_dt = end_dt
359
365
  @duration = (@end_dt - @start_dt).to_i
360
366
  @hits = @hits + 1
361
- @pages = @pages + 1 if p_pageflag
362
- @visitortype = p_visitortype if @visitortype == "H"
363
- add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
367
+ @pgcount = @pgcount + 1 if p_pageflag == "Y"
368
+ @vtype = p_visitortype if @vtype == "H"
369
+ @returnhit = p_returnhit if @returnhit == "N"
370
+ @pdfstem = p_pdfstem
371
+ @pdflist << @pdfstem if @pdfstem and @pdflist.index(@pdfstem).nil?
372
+ @rank = calculate_rank(@pgcount, @duration, @vtype, @pdflist.length)
373
+ end
374
+
375
+ # This method updates the Visit object with results of the whois lookup
376
+ def add_details(orgname, city, country, region)
377
+ @orgname = orgname
378
+ @city = city
379
+ @country = country
380
+ @region = region
381
+ end
382
+
383
+ def getip
384
+ @ip
385
+ end
386
+
387
+ # Get rank from object
388
+ def getrank
389
+ @rank
390
+ end
391
+
392
+ # Add count to group
393
+ def increment_group(group)
394
+ @grouphash[group] = @grouphash[group].to_i + 1
395
+ end
396
+
397
+ # Classify the visit based on various factors
398
+ def classify_visit
399
+ @group = "none"
400
+ @groupcount = 0
401
+ if @grouphash.length > 0
402
+ z = @grouphash.invert.sort
403
+ zlast = z[z.length-1]
404
+ @group = zlast[1]
405
+ @groupcount = z.length
406
+ end
407
+ case @group
408
+ when "news", "company", "resources"
409
+ @persona = "Analyst"
410
+ when "home", "contacts"
411
+ @persona = "Tirekicker"
412
+ when "products", "solutions"
413
+ @persona = "Suspect"
414
+ when "careers"
415
+ @persona = "Jobhunter"
416
+ when "evolve"
417
+ @persona = "Prospect"
418
+ when "partners"
419
+ @persona = "Barney"
420
+ when "customers"
421
+ @persona = "Poacher"
422
+ else
423
+ @persona = "None"
424
+ end
425
+ @persona = "Bouncer" if @rank < 3
426
+ @persona = "Prospect" if @persona == "Suspect" and ((@rank == 4 and @returnhit == "Y") or @rank == 5)
364
427
  end
365
428
 
429
+ # This method looks to match the orgname against the orgs file
430
+ def matchorg(orgs)
431
+ @orgmatch = ""
432
+ orgs.each {|group, names|
433
+ names.each { |n|
434
+ if @orgname.index(n)
435
+ @orgmatch = group
436
+ break
437
+ end
438
+ }
439
+ }
440
+ end
441
+
442
+ # Print short output with key fields from the object
366
443
  def sendoutput
367
- #if @rank > 0
368
- iplong = @ip.to_s+" "
369
- p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@visitortype+" Hits> "+@hits.to_s+" Pgs> "+@pages.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s
370
- p" Phrase> "+@searchphrase if @searchphrase.length > 0
371
- p @pageids if @pageids.length > 0
372
- #end
444
+ iplong = @ip+" "
445
+ p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@vtype+" Pgs> "+@pgcount.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s
446
+ end
447
+
448
+ # Print long output with key fields from the object
449
+ def printoutput
450
+ iplong = @ip+" "
451
+ p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@vtype+" Pgs> "+@pgcount.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s+" Org> "+@orgname+" City> "+@city+" Country> "+@country+" Region> "+@region
452
+ end
453
+
454
+ #def createcsvheader(fileout)
455
+ #fileout.puts("ipaddress, date, time, vtype, pgcount, duration, rank, returnhit, orgname, city, country, region")
456
+ #end
457
+
458
+ #def createcsvoutput(fileout)
459
+ #iplong = @ip+" "
460
+ #fileout.puts(iplong[0..14]+","+@start_dt.to_s[0..10]+","+@start_dt.to_s[11..18]+","+@vtype+","+@pgcount.to_s+","+@duration.to_s+","+@rank.to_s+","+@returnhit+","+@orgname+","+@city+","+@country+","+@region)
461
+ #end
462
+
463
+ # Store output to Google docs spreadsheet
464
+ def gdocsoutput (ws, row_count)
465
+ r = row_count+2
466
+ ws[r,1] = @ip
467
+ ws[r,2] = @start_dt.strftime("%m/%d/%Y")
468
+ ws[r,3] = @start_dt.strftime("%I:%M%p")
469
+ ws[r,4] = @pgcount
470
+ ws[r,5] = @duration
471
+ ws[r,6] = (@pdflist.length)
472
+ ws[r,7] = @rank
473
+ ws[r,8] = @orgname
474
+ ws[r,9] = @city
475
+ ws[r,10] = @country
476
+ ws[r,11] = @region
477
+ ws[r,12] = @returnhit
478
+ ws[r,13] = @persona
479
+ ws[r,14] = @group
480
+ ws[r,15] = @groupcount
481
+ ws[r,16] = @orgmatch
482
+ ws.save()
373
483
  end
484
+
485
+ # Save output to database file
486
+ def saveoutput
487
+ @vzvisit = Vzvisit.new
488
+ @vzvisit[:ipaddr] = @ip
489
+ @vzvisit[:vdatetime] = @start_dt
490
+ @vzvisit[:vtype] = @vtype
491
+ @vzvisit[:pgcount] = @pgcount
492
+ @vzvisit[:duration] = @duration
493
+ @vzvisit[:rank] = @rank
494
+ @vzvisit[:orgname] = @orgname
495
+ @vzvisit[:city] = @city
496
+ @vzvisit[:country] = @country
497
+ @vzvisit[:region] = @region
498
+ @vzvisit[:returnhit] = @returnhit
499
+ @vzvisit[:group] = @group
500
+ @vzvisit[:groupcount] = @groupcount
501
+ @vzvisit[:persona] = @persona
502
+ @vzvisit[:orgmatch] = @orgmatch
503
+ @vzvisit[:pdfhits] = @pdflist.length
504
+ @vzvisit.save
505
+ end
506
+
374
507
  end
375
508
 
376
509
  # This class creates and manages a list to keep track of the visits that are in process (cached)
data/log/parse.log CHANGED
@@ -1,79 +1,25 @@
1
- # Logfile created on 2011-06-05 21:46:32 -0400 by logger.rb/25413
2
- W, [2011-06-05T21:46:32.006263 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
3
-
4
- W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
5
- W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
6
-
7
- W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
8
- W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
9
-
10
- W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
11
- W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
12
-
13
- W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
14
- W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
15
-
16
- W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
17
- W, [2011-06-05T22:08:53.528629 #560] WARN -- :
18
-
19
- W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
20
- W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
21
-
22
- W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
23
- W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
24
-
25
- W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
26
- W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
27
-
28
- W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
29
- W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
30
-
31
- W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
32
- W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
33
-
34
- W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
35
- W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
36
-
37
- W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
38
- W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
39
-
40
- W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
41
- W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
42
-
43
- W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
44
- W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
45
-
46
- W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
47
- W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
48
-
49
- W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
50
- W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
51
-
52
- W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
53
- W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
54
-
55
- W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
56
- W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
57
-
58
- W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
59
- W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
60
-
61
- W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
62
- W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
63
-
64
- W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
65
- W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
66
-
67
- W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
68
- W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
1
+ # Logfile created on Sat May 05 17:30:06 -0400 2012 by logger.rb/22285
2
+ W, [2012-05-05T17:30:06.758356 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
69
3
 
70
- W, [2011-06-10T22:04:33.846398 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
71
- W, [2011-06-10T22:04:34.038272 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
4
+ W, [2012-05-05T17:30:06.758765 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
5
+ W, [2012-05-05T17:30:08.532729 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
72
6
 
73
- W, [2011-06-10T22:04:34.038371 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
74
- W, [2011-06-10T22:04:34.123624 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
7
+ W, [2012-05-05T17:30:08.532918 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
8
+ W, [2012-05-05T17:30:09.348402 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
75
9
 
76
- W, [2011-06-10T22:04:34.123712 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
77
- W, [2011-06-10T22:04:34.221506 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
10
+ W, [2012-05-05T17:30:09.348559 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
11
+ W, [2012-05-05T17:30:10.385291 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
78
12
 
79
- W, [2011-06-10T22:04:34.221596 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
13
+ W, [2012-05-05T17:30:10.385392 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
14
+ W, [2012-05-05T17:30:43.757483 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
15
+
16
+ W, [2012-05-05T17:30:43.757621 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
17
+ W, [2012-05-05T17:30:45.519507 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
18
+
19
+ W, [2012-05-05T17:30:45.519672 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
20
+ W, [2012-05-05T17:30:46.357673 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
21
+
22
+ W, [2012-05-05T17:30:46.357847 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
23
+ W, [2012-05-05T17:30:47.451169 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
24
+
25
+ W, [2012-05-05T17:30:47.451274 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
@@ -0,0 +1,79 @@
1
+ # Logfile created on 2011-06-05 21:46:32 -0400 by logger.rb/25413
2
+ W, [2011-06-05T21:46:32.006263 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
3
+
4
+ W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
5
+ W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
6
+
7
+ W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
8
+ W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
9
+
10
+ W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
11
+ W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
12
+
13
+ W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
14
+ W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
15
+
16
+ W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
17
+ W, [2011-06-05T22:08:53.528629 #560] WARN -- :
18
+
19
+ W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
20
+ W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
21
+
22
+ W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
23
+ W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
24
+
25
+ W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
26
+ W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
27
+
28
+ W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
29
+ W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
30
+
31
+ W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
32
+ W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
33
+
34
+ W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
35
+ W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
36
+
37
+ W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
38
+ W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
39
+
40
+ W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
41
+ W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
42
+
43
+ W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
44
+ W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
45
+
46
+ W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
47
+ W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
48
+
49
+ W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
50
+ W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
51
+
52
+ W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
53
+ W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
54
+
55
+ W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
56
+ W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
57
+
58
+ W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
59
+ W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
60
+
61
+ W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
62
+ W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
63
+
64
+ W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
65
+ W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
66
+
67
+ W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
68
+ W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
69
+
70
+ W, [2011-06-10T22:04:33.846398 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
71
+ W, [2011-06-10T22:04:34.038272 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
72
+
73
+ W, [2011-06-10T22:04:34.038371 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
74
+ W, [2011-06-10T22:04:34.123624 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
75
+
76
+ W, [2011-06-10T22:04:34.123712 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
77
+ W, [2011-06-10T22:04:34.221506 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
78
+
79
+ W, [2011-06-10T22:04:34.221596 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
data/log/system.log CHANGED
@@ -1,66 +1,12 @@
1
- # Logfile created on 2011-06-05 21:46:30 -0400 by logger.rb/25413
2
- I, [2011-06-05T21:46:30.867462 #3444] INFO -- : starting ... >>> 2011-06-05 21:46:30 -0400
3
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Record count is 5458
4
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Hit count is 5438
5
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Page count is 506
6
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Total visit count is 461
7
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Human visit count is 119
8
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Drop visit count is 78
9
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Spider visit count is 264
10
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Batch processing time 1.677403
11
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : ending ... >>> 2011-06-05 21:46:32 -0400
12
- I, [2011-06-05T21:46:53.028704 #5108] INFO -- : starting ... >>> 2011-06-05 21:46:53 -0400
13
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Record count is 30474
14
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Hit count is 30474
15
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Page count is 0
16
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Total visit count is 4484
17
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Human visit count is 4227
18
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Drop visit count is 0
19
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Spider visit count is 257
20
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Batch processing time 13.675125
21
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : ending ... >>> 2011-06-05 21:47:06 -0400
22
- I, [2011-06-05T21:47:44.613806 #1112] INFO -- : starting ... >>> 2011-06-05 21:47:44 -0400
23
- I, [2011-06-05T21:48:01.451639 #4252] INFO -- : starting ... >>> 2011-06-05 21:48:01 -0400
24
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Record count is 11870
25
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Hit count is 11870
26
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Page count is 0
27
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Total visit count is 1
28
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Human visit count is 1
29
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Drop visit count is 0
30
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Spider visit count is 0
31
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Batch processing time 3.744208
32
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : ending ... >>> 2011-06-05 21:48:05 -0400
33
- I, [2011-06-05T21:48:20.829574 #4544] INFO -- : starting ... >>> 2011-06-05 21:48:20 -0400
34
- I, [2011-06-05T21:50:44.333188 #960] INFO -- : starting ... >>> 2011-06-05 21:50:44 -0400
35
- I, [2011-06-05T21:51:56.322305 #4712] INFO -- : starting ... >>> 2011-06-05 21:51:56 -0400
36
- I, [2011-06-05T21:53:42.440375 #4832] INFO -- : starting ... >>> 2011-06-05 21:53:42 -0400
37
- I, [2011-06-05T22:07:49.079943 #4560] INFO -- : starting ... >>> 2011-06-05 22:07:49 -0400
38
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Record count is 47
39
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Hit count is 42
40
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Page count is 0
41
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Total visit count is 1
42
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Human visit count is 1
43
- I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Drop visit count is 0
44
- I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Spider visit count is 0
45
- I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Batch processing time 0.105006
46
- I, [2011-06-05T22:07:49.185949 #4560] INFO -- : ending ... >>> 2011-06-05 22:07:49 -0400
47
- I, [2011-06-05T22:08:53.515628 #560] INFO -- : starting ... >>> 2011-06-05 22:08:53 -0400
48
- I, [2011-06-05T22:08:53.529629 #560] INFO -- : Record count is 47
49
- I, [2011-06-05T22:08:53.529629 #560] INFO -- : Hit count is 42
50
- I, [2011-06-05T22:08:53.529629 #560] INFO -- : Page count is 0
51
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Total visit count is 1
52
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Human visit count is 1
53
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Drop visit count is 0
54
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Spider visit count is 0
55
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Batch processing time 0.014
56
- I, [2011-06-05T22:08:53.531629 #560] INFO -- : ending ... >>> 2011-06-05 22:08:53 -0400
57
- I, [2011-06-05T22:37:50.150958 #3476] INFO -- : starting ... >>> 2011-06-05 22:37:50 -0400
58
- I, [2011-06-05T22:37:51.664045 #3476] INFO -- : Record count is 5458
59
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Hit count is 5438
60
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Page count is 506
61
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Total visit count is 461
62
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Human visit count is 119
63
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Drop visit count is 78
64
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Spider visit count is 264
65
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Batch processing time 1.513087
66
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : ending ... >>> 2011-06-05 22:37:51 -0400
1
+ # Logfile created on Sat May 05 17:04:25 -0400 2012 by logger.rb/22285
2
+ I, [2012-05-05T17:04:25.715059 #2398] INFO -- : starting ... >>> Sat May 05 17:04:25 -0400 2012
3
+ I, [2012-05-05T17:08:58.497120 #2445] INFO -- : starting ... >>> Sat May 05 17:08:58 -0400 2012
4
+ I, [2012-05-05T17:12:10.618586 #2511] INFO -- : starting ... >>> Sat May 05 17:12:10 -0400 2012
5
+ I, [2012-05-05T17:18:56.448474 #2526] INFO -- : starting ... >>> Sat May 05 17:18:56 -0400 2012
6
+ I, [2012-05-05T17:19:48.272182 #2531] INFO -- : starting ... >>> Sat May 05 17:19:48 -0400 2012
7
+ I, [2012-05-05T17:23:04.812969 #2572] INFO -- : starting ... >>> Sat May 05 17:23:04 -0400 2012
8
+ I, [2012-05-05T17:27:53.760281 #2591] INFO -- : starting ... >>> Sat May 05 17:27:53 -0400 2012
9
+ I, [2012-05-05T17:30:00.755342 #2592] INFO -- : starting ... >>> Sat May 05 17:30:00 -0400 2012
10
+ I, [2012-05-05T17:30:10.734129 #2592] INFO -- : ending ... >>> Sat May 05 17:30:10 -0400 2012
11
+ I, [2012-05-05T17:30:37.881134 #2593] INFO -- : starting ... >>> Sat May 05 17:30:37 -0400 2012
12
+ I, [2012-05-05T17:30:47.799064 #2593] INFO -- : ending ... >>> Sat May 05 17:30:47 -0400 2012
@@ -0,0 +1,66 @@
1
+ # Logfile created on 2011-06-05 21:46:30 -0400 by logger.rb/25413
2
+ I, [2011-06-05T21:46:30.867462 #3444] INFO -- : starting ... >>> 2011-06-05 21:46:30 -0400
3
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Record count is 5458
4
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Hit count is 5438
5
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Page count is 506
6
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Total visit count is 461
7
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Human visit count is 119
8
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Drop visit count is 78
9
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Spider visit count is 264
10
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Batch processing time 1.677403
11
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : ending ... >>> 2011-06-05 21:46:32 -0400
12
+ I, [2011-06-05T21:46:53.028704 #5108] INFO -- : starting ... >>> 2011-06-05 21:46:53 -0400
13
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Record count is 30474
14
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Hit count is 30474
15
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Page count is 0
16
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Total visit count is 4484
17
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Human visit count is 4227
18
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Drop visit count is 0
19
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Spider visit count is 257
20
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Batch processing time 13.675125
21
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : ending ... >>> 2011-06-05 21:47:06 -0400
22
+ I, [2011-06-05T21:47:44.613806 #1112] INFO -- : starting ... >>> 2011-06-05 21:47:44 -0400
23
+ I, [2011-06-05T21:48:01.451639 #4252] INFO -- : starting ... >>> 2011-06-05 21:48:01 -0400
24
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Record count is 11870
25
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Hit count is 11870
26
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Page count is 0
27
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Total visit count is 1
28
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Human visit count is 1
29
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Drop visit count is 0
30
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Spider visit count is 0
31
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Batch processing time 3.744208
32
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : ending ... >>> 2011-06-05 21:48:05 -0400
33
+ I, [2011-06-05T21:48:20.829574 #4544] INFO -- : starting ... >>> 2011-06-05 21:48:20 -0400
34
+ I, [2011-06-05T21:50:44.333188 #960] INFO -- : starting ... >>> 2011-06-05 21:50:44 -0400
35
+ I, [2011-06-05T21:51:56.322305 #4712] INFO -- : starting ... >>> 2011-06-05 21:51:56 -0400
36
+ I, [2011-06-05T21:53:42.440375 #4832] INFO -- : starting ... >>> 2011-06-05 21:53:42 -0400
37
+ I, [2011-06-05T22:07:49.079943 #4560] INFO -- : starting ... >>> 2011-06-05 22:07:49 -0400
38
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Record count is 47
39
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Hit count is 42
40
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Page count is 0
41
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Total visit count is 1
42
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Human visit count is 1
43
+ I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Drop visit count is 0
44
+ I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Spider visit count is 0
45
+ I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Batch processing time 0.105006
46
+ I, [2011-06-05T22:07:49.185949 #4560] INFO -- : ending ... >>> 2011-06-05 22:07:49 -0400
47
+ I, [2011-06-05T22:08:53.515628 #560] INFO -- : starting ... >>> 2011-06-05 22:08:53 -0400
48
+ I, [2011-06-05T22:08:53.529629 #560] INFO -- : Record count is 47
49
+ I, [2011-06-05T22:08:53.529629 #560] INFO -- : Hit count is 42
50
+ I, [2011-06-05T22:08:53.529629 #560] INFO -- : Page count is 0
51
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Total visit count is 1
52
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Human visit count is 1
53
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Drop visit count is 0
54
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Spider visit count is 0
55
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Batch processing time 0.014
56
+ I, [2011-06-05T22:08:53.531629 #560] INFO -- : ending ... >>> 2011-06-05 22:08:53 -0400
57
+ I, [2011-06-05T22:37:50.150958 #3476] INFO -- : starting ... >>> 2011-06-05 22:37:50 -0400
58
+ I, [2011-06-05T22:37:51.664045 #3476] INFO -- : Record count is 5458
59
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Hit count is 5438
60
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Page count is 506
61
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Total visit count is 461
62
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Human visit count is 119
63
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Drop visit count is 78
64
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Spider visit count is 264
65
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Batch processing time 1.513087
66
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : ending ... >>> 2011-06-05 22:37:51 -0400
data/testit.rb CHANGED
@@ -6,9 +6,8 @@
6
6
  #
7
7
  # Author:: Al Kivi <al.kivi@vizitrax.com>
8
8
 
9
- # Use either the library version of the code
10
- ## require './lib/vizi_tracker'
11
- # Or replace the above line to reference the gem version, if installed
9
+ require 'rubygems' # needed for ruby 1.8.7
10
+ # require '...\vizi_tracker\lib\vizi\vizi_tracker'
12
11
  require 'vizi_tracker'
13
12
 
14
13
  require 'yaml'
@@ -21,10 +20,10 @@ config = YAML.load_file("config/logger.yml")
21
20
 
22
21
  # Initialize the log parser
23
22
  parser = Vizi::LogParser.new(config["drop_ips"], config["spider_ips"],
24
- config["spider_names"], config["page_urls"], config["hide_urls"],
25
- config["homepage"], config["accept_only_homepage"],config["hostname"],
26
- config["drop_refers_by_hostname"], config["use_local_time"],
27
- config["assigned_numbers"], config["match_page_numbers"])
23
+ config["spider_names"], config["include_urls"], config["exclude_urls"],
24
+ config["url_stem"], config["accept_only_url_stem"],config["hostname"],
25
+ config["drop_refers_by_hostname"], config["usual_agents"],
26
+ config["use_local_time"])
28
27
 
29
28
  syslog = Logger.new('./log/system.log',shift_age = 'weekly')
30
29
  case config["log_level"]
@@ -57,6 +56,7 @@ File.open('./data/exlog.log', 'r') do |file|
57
56
  logformat = nil
58
57
  # Begin to parse each record
59
58
  while(line = file.gets)
59
+ p line
60
60
  parsed_data = parser.parse_line(line, logformat)
61
61
  logformat = parsed_data[:p_logformat]
62
62
  rec_count = rec_count + 1
@@ -65,30 +65,29 @@ File.open('./data/exlog.log', 'r') do |file|
65
65
  page_count = page_count + 1 if parsed_data[:p_pageflag]
66
66
  @visit=vlist.find_by_ip(parsed_data[:ip])
67
67
  if @visit.nil?
68
- vlist.append(Vizi::Visit.new(parsed_data[:ip],parsed_data[:datetime],parsed_data[:csuristem],parsed_data[:csuriquery], parsed_data[:timetaken],
69
- parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase],parsed_data[:p_pageid]))
68
+ vlist.append(Vizi::Visit.new(parsed_data[:ip],parsed_data[:datetime],parsed_data[:csuristem],parsed_data[:csuriquery], parsed_data[:timetaken],
69
+ parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_returnhit],parsed_data[:p_pdfstem],config["visit_timeout"]))
70
70
  @visit=vlist.find_by_ip(parsed_data[:ip])
71
71
  visit_count = visit_count + 1
72
72
  else
73
- @visit.update(parsed_data[:datetime],parsed_data[:csuriquery],parsed_data[:timetaken],
74
- parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase], parsed_data[:p_pageid])
73
+ @visit.update(parsed_data[:datetime],parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_returnhit],parsed_data[:p_pdfstem])
75
74
  end
76
75
  @visits = vlist.find_expired(@visit.start_dt)
77
76
  if @visits
78
77
  @visits.sendoutput
79
78
  vlist.delete(@visits)
80
- human_count = human_count + 1 if @visits.visitortype == "H"
81
- drop_count = drop_count + 1 if @visits.visitortype == "D"
82
- spider_count = spider_count + 1 if @visits.visitortype == "S"
79
+ human_count = human_count + 1 if @visits.vtype == "H"
80
+ drop_count = drop_count + 1 if @visits.vtype == "D"
81
+ spider_count = spider_count + 1 if @visits.vtype == "S"
83
82
  end
84
83
  break if rec_count == max_rec_count
85
84
  end
86
85
  @visits = vlist.find_all
87
86
  @visits.each {|v|
88
87
  v.sendoutput
89
- human_count = human_count + 1 if v.visitortype == "H"
90
- drop_count = drop_count + 1 if v.visitortype == "D"
91
- spider_count = spider_count + 1 if v.visitortype == "S"
88
+ human_count = human_count + 1 if v.vtype == "H"
89
+ drop_count = drop_count + 1 if v.vtype == "D"
90
+ spider_count = spider_count + 1 if v.vtype == "S"
92
91
  }
93
92
  if config["summary_flag"]
94
93
  syslog.info "Record count is "+rec_count.to_s
Binary file
data/vizi_tracker.gemspec CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
2
2
 
3
3
  spec = Gem::Specification.new do |s|
4
4
  s.name = 'vizi_tracker'
5
- s.version = '0.3.0'
5
+ s.version = '0.4.0'
6
6
  s.summary = "Visit tracking from Apache or IIS web log files"
7
7
  s.description = "This module provides a set of classes to support the parsing of web log files and
8
8
  the creation of visits from the individual parsed web log records.
@@ -17,5 +17,5 @@ spec = Gem::Specification.new do |s|
17
17
  s.homepage = "http://www.vizitrax.com"
18
18
  s.email = "al.kivi@yahoo.com"
19
19
  s.has_rdoc = true
20
- s.required_ruby_version = '>= 1.8.2'
20
+ s.required_ruby_version = '>= 1.8.7'
21
21
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vizi_tracker
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 15
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 3
8
+ - 4
9
9
  - 0
10
- version: 0.3.0
10
+ version: 0.4.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Al Kivi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-01-09 00:00:00 -05:00
18
+ date: 2012-05-05 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies: []
21
21
 
@@ -102,6 +102,8 @@ files:
102
102
  - Rakefile
103
103
  - log/parse.log
104
104
  - log/system.log
105
+ - log/system.log.20120428
106
+ - log/parse.log.20120428
105
107
  - test/parser_test.rb
106
108
  - test/test_helper.rb
107
109
  - config/logger.yml
@@ -109,6 +111,7 @@ files:
109
111
  - config/logger_sample.yml
110
112
  - config/logger_apache.yml
111
113
  - testit.rb
114
+ - vizi_tracker-0.4.0.gem
112
115
  - vizi_tracker.gemspec
113
116
  has_rdoc: true
114
117
  homepage: http://www.vizitrax.com
@@ -124,12 +127,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
124
127
  requirements:
125
128
  - - ">="
126
129
  - !ruby/object:Gem::Version
127
- hash: 51
130
+ hash: 57
128
131
  segments:
129
132
  - 1
130
133
  - 8
131
- - 2
132
- version: 1.8.2
134
+ - 7
135
+ version: 1.8.7
133
136
  required_rubygems_version: !ruby/object:Gem::Requirement
134
137
  none: false
135
138
  requirements: