vizi_tracker 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/config/logger.yml CHANGED
@@ -1,49 +1,39 @@
1
- # Configuration data for logger
2
- #max_rec_count: 100
1
+ # basic configuration data ...................................
2
+ max_rec_count: 99999
3
+ sub_directory: data/
3
4
  visit_timeout: 1200
4
- summary_flag: true
5
- convert_to_lower_case: true
6
- log_level: warn
7
5
  #log_level options are debug, info, warn, error, fatal
8
- homepage: /home.aspx
9
- #homepage: /
10
- accept_only_homepage: true
11
- page_urls:
12
- - aspx
13
- - asp
14
- hide_urls:
15
- - css
16
- - js
17
- - gif
18
- - swf
19
- - ico
20
- - chart
21
- - robots
6
+ log_level: info
7
+ log_record_counts: true
8
+ convert_to_lower_case: true
9
+ url_stem: /
10
+ accept_only_url_stem: false
11
+ include_urls:
12
+ - .htm
13
+ - .pdf
14
+ - home.aspx
15
+ exclude_urls:
16
+ - .html
17
+ # sitemap_match: true
22
18
  hostname: www.sigma-systems.com
23
- drop_refers_by_hostname: true
19
+ drop_refers_by_hostname: false
20
+ # output_where_rank_over: 2
24
21
  use_local_time: true
25
- download_page_number: 45
26
22
  drop_ips:
23
+ - 10.131.0.4
24
+ - 76.12.171.4
27
25
  - 76.12.185.100
28
26
  spider_ips:
29
27
  - 66.98.254.55
30
- - 64.208.168.252
31
- - 64.235.108.183
32
- - 76.2.144.115
33
- - 66.98.254.236
34
- - 202.108.22.132
35
- - 89.122.29.77
36
- - 95.174.93.222
37
- - 66.55.37.179
38
- - 198.45.18.20
39
- - 38.104.227.3
28
+ # post processing instructions ..............................
29
+ output_where_rank_over: 2
30
+ sitemap_match: true
31
+ # spider and user agents ....................................
40
32
  spider_names:
41
33
  - bot
42
34
  - spider
43
35
  - slurp
44
36
  - root.exe
45
- - .dll
46
- - slurp
47
37
  - looksmart
48
38
  - nutchsvc
49
39
  - iconsurf
@@ -55,15 +45,63 @@ spider_names:
55
45
  - konsqueror
56
46
  - crawler
57
47
  - searchme
58
- - java/1.6.0_04
48
+ - findlinks
59
49
  - scoutjet
60
50
  - yeti
61
51
  - yandex
62
- # convert urls to assigned numbers where numbers cannot be parsed from url
63
- assigned_numbers:
64
- - /visit/index,1
65
- - /visit/show/,2
66
- - /visit/vcardedit/,3
67
- - /visit/showmap_na,4
68
- - /visit/showmap_row,5
69
- match_page_numbers: false
52
+ - fetch
53
+ - nutch
54
+ - sleuth
55
+ - globalspec
56
+ - openurl
57
+ - python
58
+ - acoon
59
+ - itim
60
+ - metauri
61
+ - extractor
62
+ - exractor
63
+ - pagegetter
64
+ - linkdex
65
+ - website
66
+ - xfruits
67
+ - binlar
68
+ - abacus
69
+ - anemone
70
+ - ichiro
71
+ - scraping
72
+ - xpymep
73
+ - swish
74
+ - sitemaps
75
+ - yahoocachesystem
76
+ - sexy
77
+ - .dll
78
+ - java/1
79
+ usual_agents:
80
+ - mozilla
81
+ - opera
82
+ - chrome
83
+ - lynx
84
+ - nokia
85
+ - blackberry
86
+ - sharp
87
+ - docomo
88
+ - curl
89
+ - facebook
90
+ - microsoft
91
+ - securepoint
92
+ - wordpress
93
+ # advanced: db lookups and csv ..................................
94
+ whois_lookup: true
95
+ use_db_lookup: true
96
+ use_db_visits: true
97
+ # create_csv_file: false
98
+ # advanced: email notification ..................................
99
+ admin_email: ...
100
+ user_email: ..
101
+ email_server: ...
102
+ send_logmsg_admin: ...
103
+ # advanced: google docs connection ..............................
104
+ use_gdocs: ...
105
+ gdocs_user: ...
106
+ gdocs_password: ...
107
+ gdocs_key: ...
@@ -18,30 +18,30 @@ module Vizi
18
18
  # format string char => [:symbol to use, /regex to use when matching against log/]
19
19
  'h' => [:ip, /\d+\.\d+\.\d+\.\d+/], # apache and IIS: called c-ip in IIS
20
20
  'p' => [:sip, /\d+\.\d+\.\d+\.\d+/], # IIS:
21
- 'g' => [:auth, /\S*/], # apache:
22
- 'u' => [:username, /\S*/], # apache and IIS: called cs-username in IIS
23
- 't' => [:dtstring, /\[.*?\]/], # apache: one field with date and time
21
+ 'g' => [:auth, /\S*/], # apache:
22
+ 'u' => [:username, /\S*/], # apache and IIS: called cs-username in IIS
23
+ 't' => [:dtstring, /\[.*?\]/], # apache: one field with date and time
24
24
  'd' => [:datestring, /\d+\-\d+\-\d+/], # IIS:
25
25
  'e' => [:timestring, /\d+\:\d+\:\d+/], # IIS:
26
- 'r' => [:request, /.*?/], # apache: includes both csmethod and csuristem
27
- 'm' => [:csmethod, /\w*?/], # IIS:
28
- 'w' => [:csuristem, /\S*/], # IIS:
29
- 's' => [:status, /\d+/], # apache and IIS: is called sc_status in IIS
30
- 'b' => [:bytecount, /-|\d+/], # apache and IIS: is called cs_bytes in IIS
31
- 'v' => [:domain, /.*?/], # apache and IIS: is c-computername in IIS
32
- 'i' => [:header_lines, /.*?/], # apache: transforms to useragent or referer or cookies
33
- 'a' => [:useragent, /\S*/], # IIS:
34
- 'j' => [:referer, /\S*/], # IIS:
35
- 'k' => [:cscookie, /\d+/], # IIS:
36
- 'q' => [:csuriquery, /.*/], # IIS:
37
- 'y' => [:csbytes, /d+/], # IIS:
38
- 'o' => [:sport, /\d+/], # IIS:
39
- 'x' => [:scsubstatus, /\d+/], # IIS:
40
- 'z' => [:cshost, /\d+/], # IIS:
41
- 'l' => [:win32status, /\d+/], # IIS:
42
- 'n' => [:timetaken, /\d+/], # IIS:
43
- 'c' => [:comment, /^#/], # IIS: comment line identifier
44
- 'f' => [:fields, /^#Fields:/] # IIS: field line identifier
26
+ 'r' => [:request, /.*?/], # apache: includes both csmethod and csuristem
27
+ 'm' => [:csmethod, /\w*?/], # IIS:
28
+ 'w' => [:csuristem, /\S*/], # IIS:
29
+ 's' => [:status, /\d+/], # apache and IIS: is called sc_status in IIS
30
+ 'b' => [:bytecount, /-|\d+/], # apache and IIS: is called cs_bytes in IIS
31
+ 'v' => [:domain, /.*?/], # apache and IIS: is c-computername in IIS
32
+ 'i' => [:header_lines, /.*?/], # apache: transforms to useragent or referer or cookies
33
+ 'a' => [:useragent, /\S*/], # IIS:
34
+ 'j' => [:referer, /\S*/], # IIS:
35
+ 'k' => [:cscookie, /\d+/], # IIS:
36
+ 'q' => [:csuriquery, /.*/], # IIS:
37
+ 'y' => [:csbytes, /d+/], # IIS:
38
+ 'o' => [:sport, /\d+/], # IIS:
39
+ 'x' => [:scsubstatus, /\d+/], # IIS:
40
+ 'z' => [:cshost, /\d+/], # IIS:
41
+ 'l' => [:win32status, /\d+/], # IIS:
42
+ 'n' => [:timetaken, /\d+/], # IIS:
43
+ 'c' => [:comment, /^#/], # IIS: comment line identifier
44
+ 'f' => [:fields, /^#Fields:/] # IIS: field line identifier
45
45
  }
46
46
 
47
47
  # This method initializes the LogFormat object with fieldnames and log formats
@@ -106,7 +106,7 @@ module Vizi
106
106
  'cs-uri-query' => 'q',
107
107
  'cs(Referer)' => 'j', # internal shortened to referer
108
108
  'cs(User-Agent)' => 'a', # internal shortened to useragent
109
- 'cs(Cookie)' => 'k', # internal shortened to cscookie
109
+ 'cs(Cookie)' => 'k', # internal shortened to cscookie
110
110
  's-port' => 'o',
111
111
  'cs-host' => 'z',
112
112
  'sc-substatus' => 'x',
@@ -120,20 +120,19 @@ module Vizi
120
120
  #@@log = ActiveRecord::Base.logger
121
121
 
122
122
  # This method initialises LogParser object and loads the configurable logger control items
123
- def initialize(drop_ips, spider_ips, spider_names, page_urls, hide_urls, homepage, accept_only_homepage,
124
- hostname, drop_refers_by_hostname, use_local_time, assigned_numbers, match_page_numbers)
123
+ def initialize(drop_ips, spider_ips, spider_names, include_urls, exclude_urls, url_stem, accept_only_url_stem,
124
+ hostname, drop_refers_by_hostname, usualagents, use_local_time)
125
125
  @drops = drop_ips
126
126
  @sips = spider_ips
127
127
  @snames = spider_names
128
- @page_urls = page_urls
129
- @hide_urls = hide_urls
130
- @homepage = homepage
131
- @accept_only_homepage = accept_only_homepage
128
+ @include_urls = include_urls
129
+ @exclude_urls = exclude_urls
130
+ @url_stem = url_stem
131
+ @accept_only_url_stem = accept_only_url_stem
132
132
  @hostname = hostname
133
133
  @drop_refers_by_hostname = drop_refers_by_hostname
134
+ @usualagents = usualagents
134
135
  @use_local_time = use_local_time
135
- @assigned_numbers = assigned_numbers
136
- @match_page_numbers = match_page_numbers
137
136
  @log_format = []
138
137
  initialize_known_formats
139
138
  @parselog = Logger.new('./log/parse.log', shift_age = 'weekly')
@@ -174,7 +173,7 @@ module Vizi
174
173
  hit = nil
175
174
  i = 0
176
175
  while i < fldarray.length
177
- hit = field.index(fldarray[i])
176
+ hit = field.index(fldarray[i])
178
177
  break if hit
179
178
  i = i + 1
180
179
  end
@@ -199,6 +198,9 @@ module Vizi
199
198
 
200
199
  # apache files ... regex the file to determine logformat name
201
200
  # IIS files ... parse the fields string to determine the file contents
201
+ # :p_linetype ... line is a (C)ontrol line, (F)ield line or a good (V)isitor line
202
+ # :p_pageflag ... (Y)es is a valid page or (N)ot
203
+ # :p_vistortype ... (H)uman, (S)pider, (D)ropped or (-) Not relevant
202
204
  def parse_line(line, logformat)
203
205
  if logformat != nil
204
206
  log_format = logformat # get log_format string
@@ -219,7 +221,7 @@ module Vizi
219
221
  parsed_data[:p_logformat] = logformat
220
222
  parsed_data[:p_visitortype] = "H" # set default visitor type (H)uman
221
223
  parsed_data[:p_linetype] = "V" # linetype is (V)isitors
222
- parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
224
+ parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line
223
225
  if @format_name.to_s == "w3c_f" # IIS file name ... generic
224
226
  @format = build_format(line) # parse fields to get log_format
225
227
  temp_format = Vizi::LogFormat.new(:temp, @format) # create temp format
@@ -252,28 +254,39 @@ module Vizi
252
254
 
253
255
  if parsed_data[:request]
254
256
  # splitrequest = parsed_data[:request].gsub("/", " ").split
255
- splitrequest = parsed_data[:request].split(' ')
257
+ splitrequest = parsed_data[:request].split(' ')
256
258
  parsed_data[:csuristem] = splitrequest[1]
257
259
  end
258
260
 
259
- # Now classify visitortype based on logger yml rules ...
260
-
261
- parsed_data[:p_pageflag] = false
262
- if @accept_only_homepage
263
- #p @homepage
264
- #p parsed_data[:csuristem]
265
- parsed_data[:p_pageflag] = true if parsed_data[:csuristem].downcase.index(@homepage) == 0
261
+ # Now determine visitortype based on logger yml rules ...
262
+ parsed_data[:p_pageflag] = "N"
263
+ if @accept_only_url_stem # indicates that url_stem must always appear at start of csuristem
264
+ parsed_data[:p_pageflag] = "Y" if parsed_data[:csuristem].downcase.index(@url_stem) == 0
266
265
  else
267
- parsed_data[:p_pageflag] = true if match_partial(parsed_data[:csuristem], @page_urls)
268
- end
269
- parsed_data[:p_pageflag] = false if @hide_urls and match_partial(parsed_data[:csuristem], @hide_urls)
270
-
266
+ if parsed_data[:csuristem].downcase == @url_stem
267
+ parsed_data[:p_pageflag] = "Y"
268
+ else
269
+ if @include_urls
270
+ parsed_data[:p_pageflag] = "Y" if match_partial(parsed_data[:csuristem].downcase, @include_urls)
271
+ end
272
+ if @exclude_urls
273
+ parsed_data[:p_pageflag] = "N" if match_partial(parsed_data[:csuristem].downcase, @exclude_urls)
274
+ end
275
+ end
276
+ end
277
+
278
+ parsed_data[:p_visitortype] = "D" if parsed_data[:status] == "404"
271
279
  parsed_data[:p_visitortype] = "D" if @drops and @drops.index(parsed_data[:ip])
272
- parsed_data[:p_visitortype] = "S" if @sips and@sips.index(parsed_data[:ip])
273
-
274
- if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent], @snames)
280
+ parsed_data[:p_visitortype] = "S" if @sips and @sips.index(parsed_data[:ip])
281
+ if parsed_data[:useragent] and @snames and match_partial(parsed_data[:useragent].downcase, @snames)
275
282
  parsed_data[:p_visitortype] = "S"
276
283
  end
284
+ parsed_data[:p_visitortype] = "S" if parsed_data[:useragent] == "-"
285
+ parsed_data[:p_usualagent] = "Y"
286
+ parsed_data[:p_usualagent] = "N" if parsed_data[:p_visitortype] != "S" and not match_partial(parsed_data[:useragent].downcase, @usualagents)
287
+
288
+ parsed_data[:p_returnhit] = "N"
289
+ parsed_data[:p_returnhit] = "Y" if parsed_data[:status] == "304"
277
290
 
278
291
  if parsed_data[:referer]
279
292
  y = (/(search\?\S*?[pq])=(\S*?)(&)/).match(parsed_data[:referer])
@@ -283,11 +296,8 @@ module Vizi
283
296
  end
284
297
  end
285
298
 
286
- if @match_page_numbers and parsed_data[:p_pageflag]
287
- parsed_data[:p_pageid] = find_assigned_number(parsed_data[:csuristem], @assigned_numbers)
288
- # p ">>" + parsed_data[:p_pageid].to_s if parsed_data[:p_pageid]
289
- end
290
-
299
+ parsed_data[:p_pdfstem] = nil
300
+ parsed_data[:p_pdfstem] = parsed_data[:csuristem].downcase if parsed_data[:csuristem].downcase.index("/pdfs/") == 0
291
301
  end
292
302
  parsed_data
293
303
  end
@@ -297,80 +307,203 @@ module Vizi
297
307
  # Visits are determined on the basis of the IP Address hits during a timed interval
298
308
  #
299
309
  class Visit
300
- attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :pages, :robots, :visitortype, :searchphrase
301
-
302
- # This method calculates the rank
303
- def calculate_rank(pages, duration, visitortype)
304
- ranktotal = [pages,9].min*10 + [duration/60,9].min
305
- rank = ((ranktotal+10)/20).round
306
- rank = 1 if rank == 0
307
- rank = -rank if visitortype == "S"
308
- rank = 0 if visitortype == "D"
309
- return rank
310
- end
311
-
312
- # This method extracts the name of a downloaded file from the csuriquery value
313
- def get_download(csuriquery, timetaken)
314
- download = nil
315
- if timetaken.to_i > 4000
316
- split_uri = csuriquery.split("file=")
317
- download = split_uri[1]
318
- p download
319
- end
320
- return download
321
- end
310
+ attr_accessor :ip, :start_dt, :end_dt, :expire_dt, :duration, :hits, :pgcount, :robots, :vtype,
311
+ :returnhit, :searchphrase, :orgname, :city, :country, :region, :grouphash, :group, :groupcount, :pdfstem, :pdflist
322
312
 
323
- # The method completes the initialization and update methods
324
- def add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
325
- @searchphrase = p_searchphrase if p_searchphrase
326
- @rank = calculate_rank(@pages, @duration, @visitortype)
327
- @pageids = []
328
- if p_pageid
329
- @pageids << p_pageid
330
- else
331
- z=(/(PageID)=(\d+)/).match(csuriquery)
332
- if z
333
- p_pageid = z[2].to_i
334
- @pageids << p_pageid
335
- @download_file = get_download(csuriquery, timetaken) if p_pageid == @@download_page_number
336
- end
337
- end
338
- end
339
-
340
- # This method initializes the Visit object. Load object with parsed data
341
- def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
313
+ # This method initializes the Visit object. Loads object with parsed data from first captured line
314
+ def initialize(ip, log_dt, csuristem, csuriquery, timetaken, p_visitortype, p_pageflag, p_returnhit, p_pdfstem, visit_timeout)
342
315
  @ip = ip
343
316
  @start_dt = log_dt
344
- @expire_dt = @start_dt + @@visit_timeout
317
+ @expire_dt = @start_dt + visit_timeout
345
318
  @end_dt = @start_dt
346
319
  @duration = 0
347
320
  @hits = 0
348
- @pages = 0
349
- @pages = 1 if p_pageflag
350
- @visitortype = p_visitortype
351
- @visitortype = "S" if csuristem == "/robots.txt"
321
+ @pgcount = 0
322
+ @pgcount = 1 if p_pageflag == "Y"
323
+ @vtype = p_visitortype
324
+ @vtype = "S" if csuristem == "/robots.txt"
325
+ @returnhit = p_returnhit
326
+ @orgname = ""
327
+ @city = ""
328
+ @country = ""
329
+ @region = ""
330
+ @grouphash = Hash.new
331
+ @group = ""
332
+ @groupcount = 0
333
+ @orgmatch = ""
352
334
  @searchphrase = ""
353
- add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
335
+ @pdfstem = p_pdfstem
336
+ @pdflist = Array.new
337
+ @pdflist << @pdfstem if not @pdfstem.nil?
338
+ @rank = calculate_rank(@pgcount, @duration, @vtype, @pdflist.length)
339
+ end
340
+
341
+ # This method calculates the rank
342
+ def calculate_rank(pgcount, duration, visitortype, pdfhits)
343
+ if pgcount < 4
344
+ rank = pgcount
345
+ elsif pgcount > 10
346
+ rank = 5
347
+ else
348
+ rank = 4
349
+ end
350
+ rank = 2 if duration < 21
351
+ rank = 1 if duration < 11
352
+ rank = 0 if duration < 11 and pgcount > 40
353
+ rank = 0 if pgcount > duration/5
354
+ rank = 0 if duration == 0
355
+ rank = 0 if visitortype == "D"
356
+ rank = rank + 1 if pdfhits > 0
357
+ rank = 5 if rank > 5
358
+ rank = -rank if visitortype == "S"
359
+ return rank
354
360
  end
355
361
 
356
362
  # This method updates the Visit object with new parsed data
357
- def update(end_dt, csuriquery, timetaken, p_visitortype, p_pageflag, p_searchphrase, p_pageid)
363
+ def update(end_dt, p_visitortype, p_pageflag, p_returnhit, p_pdfstem)
358
364
  @end_dt = end_dt
359
365
  @duration = (@end_dt - @start_dt).to_i
360
366
  @hits = @hits + 1
361
- @pages = @pages + 1 if p_pageflag
362
- @visitortype = p_visitortype if @visitortype == "H"
363
- add_fields(csuriquery, timetaken, p_searchphrase, p_pageid)
367
+ @pgcount = @pgcount + 1 if p_pageflag == "Y"
368
+ @vtype = p_visitortype if @vtype == "H"
369
+ @returnhit = p_returnhit if @returnhit == "N"
370
+ @pdfstem = p_pdfstem
371
+ @pdflist << @pdfstem if @pdfstem and @pdflist.index(@pdfstem).nil?
372
+ @rank = calculate_rank(@pgcount, @duration, @vtype, @pdflist.length)
373
+ end
374
+
375
+ # This method updates the Visit object with results of the whois lookup
376
+ def add_details(orgname, city, country, region)
377
+ @orgname = orgname
378
+ @city = city
379
+ @country = country
380
+ @region = region
381
+ end
382
+
383
+ def getip
384
+ @ip
385
+ end
386
+
387
+ # Get rank from object
388
+ def getrank
389
+ @rank
390
+ end
391
+
392
+ # Add count to group
393
+ def increment_group(group)
394
+ @grouphash[group] = @grouphash[group].to_i + 1
395
+ end
396
+
397
+ # Classify the visit based on various factors
398
+ def classify_visit
399
+ @group = "none"
400
+ @groupcount = 0
401
+ if @grouphash.length > 0
402
+ z = @grouphash.invert.sort
403
+ zlast = z[z.length-1]
404
+ @group = zlast[1]
405
+ @groupcount = z.length
406
+ end
407
+ case @group
408
+ when "news", "company", "resources"
409
+ @persona = "Analyst"
410
+ when "home", "contacts"
411
+ @persona = "Tirekicker"
412
+ when "products", "solutions"
413
+ @persona = "Suspect"
414
+ when "careers"
415
+ @persona = "Jobhunter"
416
+ when "evolve"
417
+ @persona = "Prospect"
418
+ when "partners"
419
+ @persona = "Barney"
420
+ when "customers"
421
+ @persona = "Poacher"
422
+ else
423
+ @persona = "None"
424
+ end
425
+ @persona = "Bouncer" if @rank < 3
426
+ @persona = "Prospect" if @persona == "Suspect" and ((@rank == 4 and @returnhit == "Y") or @rank == 5)
364
427
  end
365
428
 
429
+ # This method looks to match the orgname against the orgs file
430
+ def matchorg(orgs)
431
+ @orgmatch = ""
432
+ orgs.each {|group, names|
433
+ names.each { |n|
434
+ if @orgname.index(n)
435
+ @orgmatch = group
436
+ break
437
+ end
438
+ }
439
+ }
440
+ end
441
+
442
+ # Print short output with key fields from the object
366
443
  def sendoutput
367
- #if @rank > 0
368
- iplong = @ip.to_s+" "
369
- p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@visitortype+" Hits> "+@hits.to_s+" Pgs> "+@pages.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s
370
- p" Phrase> "+@searchphrase if @searchphrase.length > 0
371
- p @pageids if @pageids.length > 0
372
- #end
444
+ iplong = @ip+" "
445
+ p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@vtype+" Pgs> "+@pgcount.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s
446
+ end
447
+
448
+ # Print long output with key fields from the object
449
+ def printoutput
450
+ iplong = @ip+" "
451
+ p ">"+iplong[0..14]+" "+@start_dt.to_s[0..18]+" "+@vtype+" Pgs> "+@pgcount.to_s+" Dur> "+@duration.to_s+" Rank> "+@rank.to_s+" Org> "+@orgname+" City> "+@city+" Country> "+@country+" Region> "+@region
452
+ end
453
+
454
+ #def createcsvheader(fileout)
455
+ #fileout.puts("ipaddress, date, time, vtype, pgcount, duration, rank, returnhit, orgname, city, country, region")
456
+ #end
457
+
458
+ #def createcsvoutput(fileout)
459
+ #iplong = @ip+" "
460
+ #fileout.puts(iplong[0..14]+","+@start_dt.to_s[0..10]+","+@start_dt.to_s[11..18]+","+@vtype+","+@pgcount.to_s+","+@duration.to_s+","+@rank.to_s+","+@returnhit+","+@orgname+","+@city+","+@country+","+@region)
461
+ #end
462
+
463
+ # Store output to Google docs spreadsheet
464
+ def gdocsoutput (ws, row_count)
465
+ r = row_count+2
466
+ ws[r,1] = @ip
467
+ ws[r,2] = @start_dt.strftime("%m/%d/%Y")
468
+ ws[r,3] = @start_dt.strftime("%I:%M%p")
469
+ ws[r,4] = @pgcount
470
+ ws[r,5] = @duration
471
+ ws[r,6] = (@pdflist.length)
472
+ ws[r,7] = @rank
473
+ ws[r,8] = @orgname
474
+ ws[r,9] = @city
475
+ ws[r,10] = @country
476
+ ws[r,11] = @region
477
+ ws[r,12] = @returnhit
478
+ ws[r,13] = @persona
479
+ ws[r,14] = @group
480
+ ws[r,15] = @groupcount
481
+ ws[r,16] = @orgmatch
482
+ ws.save()
373
483
  end
484
+
485
+ # Save output to database file
486
+ def saveoutput
487
+ @vzvisit = Vzvisit.new
488
+ @vzvisit[:ipaddr] = @ip
489
+ @vzvisit[:vdatetime] = @start_dt
490
+ @vzvisit[:vtype] = @vtype
491
+ @vzvisit[:pgcount] = @pgcount
492
+ @vzvisit[:duration] = @duration
493
+ @vzvisit[:rank] = @rank
494
+ @vzvisit[:orgname] = @orgname
495
+ @vzvisit[:city] = @city
496
+ @vzvisit[:country] = @country
497
+ @vzvisit[:region] = @region
498
+ @vzvisit[:returnhit] = @returnhit
499
+ @vzvisit[:group] = @group
500
+ @vzvisit[:groupcount] = @groupcount
501
+ @vzvisit[:persona] = @persona
502
+ @vzvisit[:orgmatch] = @orgmatch
503
+ @vzvisit[:pdfhits] = @pdflist.length
504
+ @vzvisit.save
505
+ end
506
+
374
507
  end
375
508
 
376
509
  # This class creates and manages a list to keep track of the visits that are in process (cached)
data/log/parse.log CHANGED
@@ -1,79 +1,25 @@
1
- # Logfile created on 2011-06-05 21:46:32 -0400 by logger.rb/25413
2
- W, [2011-06-05T21:46:32.006263 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
3
-
4
- W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
5
- W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
6
-
7
- W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
8
- W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
9
-
10
- W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
11
- W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
12
-
13
- W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
14
- W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
15
-
16
- W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
17
- W, [2011-06-05T22:08:53.528629 #560] WARN -- :
18
-
19
- W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
20
- W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
21
-
22
- W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
23
- W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
24
-
25
- W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
26
- W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
27
-
28
- W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
29
- W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
30
-
31
- W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
32
- W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
33
-
34
- W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
35
- W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
36
-
37
- W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
38
- W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
39
-
40
- W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
41
- W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
42
-
43
- W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
44
- W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
45
-
46
- W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
47
- W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
48
-
49
- W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
50
- W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
51
-
52
- W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
53
- W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
54
-
55
- W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
56
- W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
57
-
58
- W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
59
- W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
60
-
61
- W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
62
- W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
63
-
64
- W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
65
- W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
66
-
67
- W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
68
- W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
1
+ # Logfile created on Sat May 05 17:30:06 -0400 2012 by logger.rb/22285
2
+ W, [2012-05-05T17:30:06.758356 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
69
3
 
70
- W, [2011-06-10T22:04:33.846398 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
71
- W, [2011-06-10T22:04:34.038272 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
4
+ W, [2012-05-05T17:30:06.758765 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
5
+ W, [2012-05-05T17:30:08.532729 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
72
6
 
73
- W, [2011-06-10T22:04:34.038371 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
74
- W, [2011-06-10T22:04:34.123624 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
7
+ W, [2012-05-05T17:30:08.532918 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
8
+ W, [2012-05-05T17:30:09.348402 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
75
9
 
76
- W, [2011-06-10T22:04:34.123712 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
77
- W, [2011-06-10T22:04:34.221506 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
10
+ W, [2012-05-05T17:30:09.348559 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
11
+ W, [2012-05-05T17:30:10.385291 #2592] WARN -- : #Software: Microsoft Internet Information Services 7.0
78
12
 
79
- W, [2011-06-10T22:04:34.221596 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
13
+ W, [2012-05-05T17:30:10.385392 #2592] WARN -- : Found comment lines embedded in the log file ... resetting to nil
14
+ W, [2012-05-05T17:30:43.757483 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
15
+
16
+ W, [2012-05-05T17:30:43.757621 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
17
+ W, [2012-05-05T17:30:45.519507 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
18
+
19
+ W, [2012-05-05T17:30:45.519672 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
20
+ W, [2012-05-05T17:30:46.357673 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
21
+
22
+ W, [2012-05-05T17:30:46.357847 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
23
+ W, [2012-05-05T17:30:47.451169 #2593] WARN -- : #Software: Microsoft Internet Information Services 7.0
24
+
25
+ W, [2012-05-05T17:30:47.451274 #2593] WARN -- : Found comment lines embedded in the log file ... resetting to nil
@@ -0,0 +1,79 @@
1
+ # Logfile created on 2011-06-05 21:46:32 -0400 by logger.rb/25413
2
+ W, [2011-06-05T21:46:32.006263 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
3
+
4
+ W, [2011-06-05T21:46:32.006263 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
5
+ W, [2011-06-05T21:46:32.271464 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
6
+
7
+ W, [2011-06-05T21:46:32.271464 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
8
+ W, [2011-06-05T21:46:32.380664 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
9
+
10
+ W, [2011-06-05T21:46:32.380664 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
11
+ W, [2011-06-05T21:46:32.521064 #3444] WARN -- : #Software: Microsoft Internet Information Services 7.0
12
+
13
+ W, [2011-06-05T21:46:32.521064 #3444] WARN -- : Found comment lines embedded in the log file ... resetting to nil
14
+ W, [2011-06-05T22:07:49.183949 #4560] WARN -- :
15
+
16
+ W, [2011-06-05T22:07:49.184949 #4560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
17
+ W, [2011-06-05T22:08:53.528629 #560] WARN -- :
18
+
19
+ W, [2011-06-05T22:08:53.528629 #560] WARN -- : Found comment lines embedded in the log file ... resetting to nil
20
+ W, [2011-06-05T22:37:51.132014 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
21
+
22
+ W, [2011-06-05T22:37:51.132014 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
23
+ W, [2011-06-05T22:37:51.368028 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
24
+
25
+ W, [2011-06-05T22:37:51.368028 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
26
+ W, [2011-06-05T22:37:51.482034 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
27
+
28
+ W, [2011-06-05T22:37:51.483035 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
29
+ W, [2011-06-05T22:37:51.604041 #3476] WARN -- : #Software: Microsoft Internet Information Services 7.0
30
+
31
+ W, [2011-06-05T22:37:51.604041 #3476] WARN -- : Found comment lines embedded in the log file ... resetting to nil
32
+ W, [2011-06-05T22:58:49.429985 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
33
+
34
+ W, [2011-06-05T22:58:49.429985 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
35
+ W, [2011-06-05T22:58:49.708001 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
36
+
37
+ W, [2011-06-05T22:58:49.708001 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
38
+ W, [2011-06-05T22:58:49.854009 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
39
+
40
+ W, [2011-06-05T22:58:49.854009 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
41
+ W, [2011-06-05T22:58:49.977016 #2516] WARN -- : #Software: Microsoft Internet Information Services 7.0
42
+
43
+ W, [2011-06-05T22:58:49.977016 #2516] WARN -- : Found comment lines embedded in the log file ... resetting to nil
44
+ W, [2011-06-10T20:59:18.660550 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
45
+
46
+ W, [2011-06-10T20:59:18.660550 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
47
+ W, [2011-06-10T20:59:18.910150 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
48
+
49
+ W, [2011-06-10T20:59:18.910150 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
50
+ W, [2011-06-10T20:59:19.050551 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
51
+
52
+ W, [2011-06-10T20:59:19.050551 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
53
+ W, [2011-06-10T20:59:19.175351 #3268] WARN -- : #Software: Microsoft Internet Information Services 7.0
54
+
55
+ W, [2011-06-10T20:59:19.175351 #3268] WARN -- : Found comment lines embedded in the log file ... resetting to nil
56
+ W, [2011-06-10T21:23:46.688425 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
57
+
58
+ W, [2011-06-10T21:23:46.688425 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
59
+ W, [2011-06-10T21:23:46.969225 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
60
+
61
+ W, [2011-06-10T21:23:46.969225 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
62
+ W, [2011-06-10T21:23:47.094026 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
63
+
64
+ W, [2011-06-10T21:23:47.094026 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
65
+ W, [2011-06-10T21:23:47.218826 #756] WARN -- : #Software: Microsoft Internet Information Services 7.0
66
+
67
+ W, [2011-06-10T21:23:47.218826 #756] WARN -- : Found comment lines embedded in the log file ... resetting to nil
68
+ W, [2011-06-10T22:04:33.846288 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
69
+
70
+ W, [2011-06-10T22:04:33.846398 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
71
+ W, [2011-06-10T22:04:34.038272 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
72
+
73
+ W, [2011-06-10T22:04:34.038371 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
74
+ W, [2011-06-10T22:04:34.123624 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
75
+
76
+ W, [2011-06-10T22:04:34.123712 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
77
+ W, [2011-06-10T22:04:34.221506 #2361] WARN -- : #Software: Microsoft Internet Information Services 7.0
78
+
79
+ W, [2011-06-10T22:04:34.221596 #2361] WARN -- : Found comment lines embedded in the log file ... resetting to nil
data/log/system.log CHANGED
@@ -1,66 +1,12 @@
1
- # Logfile created on 2011-06-05 21:46:30 -0400 by logger.rb/25413
2
- I, [2011-06-05T21:46:30.867462 #3444] INFO -- : starting ... >>> 2011-06-05 21:46:30 -0400
3
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Record count is 5458
4
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Hit count is 5438
5
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Page count is 506
6
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Total visit count is 461
7
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Human visit count is 119
8
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Drop visit count is 78
9
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Spider visit count is 264
10
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Batch processing time 1.677403
11
- I, [2011-06-05T21:46:32.576065 #3444] INFO -- : ending ... >>> 2011-06-05 21:46:32 -0400
12
- I, [2011-06-05T21:46:53.028704 #5108] INFO -- : starting ... >>> 2011-06-05 21:46:53 -0400
13
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Record count is 30474
14
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Hit count is 30474
15
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Page count is 0
16
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Total visit count is 4484
17
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Human visit count is 4227
18
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Drop visit count is 0
19
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Spider visit count is 257
20
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Batch processing time 13.675125
21
- I, [2011-06-05T21:47:06.719429 #5108] INFO -- : ending ... >>> 2011-06-05 21:47:06 -0400
22
- I, [2011-06-05T21:47:44.613806 #1112] INFO -- : starting ... >>> 2011-06-05 21:47:44 -0400
23
- I, [2011-06-05T21:48:01.451639 #4252] INFO -- : starting ... >>> 2011-06-05 21:48:01 -0400
24
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Record count is 11870
25
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Hit count is 11870
26
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Page count is 0
27
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Total visit count is 1
28
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Human visit count is 1
29
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Drop visit count is 0
30
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Spider visit count is 0
31
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Batch processing time 3.744208
32
- I, [2011-06-05T21:48:05.195847 #4252] INFO -- : ending ... >>> 2011-06-05 21:48:05 -0400
33
- I, [2011-06-05T21:48:20.829574 #4544] INFO -- : starting ... >>> 2011-06-05 21:48:20 -0400
34
- I, [2011-06-05T21:50:44.333188 #960] INFO -- : starting ... >>> 2011-06-05 21:50:44 -0400
35
- I, [2011-06-05T21:51:56.322305 #4712] INFO -- : starting ... >>> 2011-06-05 21:51:56 -0400
36
- I, [2011-06-05T21:53:42.440375 #4832] INFO -- : starting ... >>> 2011-06-05 21:53:42 -0400
37
- I, [2011-06-05T22:07:49.079943 #4560] INFO -- : starting ... >>> 2011-06-05 22:07:49 -0400
38
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Record count is 47
39
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Hit count is 42
40
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Page count is 0
41
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Total visit count is 1
42
- I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Human visit count is 1
43
- I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Drop visit count is 0
44
- I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Spider visit count is 0
45
- I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Batch processing time 0.105006
46
- I, [2011-06-05T22:07:49.185949 #4560] INFO -- : ending ... >>> 2011-06-05 22:07:49 -0400
47
- I, [2011-06-05T22:08:53.515628 #560] INFO -- : starting ... >>> 2011-06-05 22:08:53 -0400
48
- I, [2011-06-05T22:08:53.529629 #560] INFO -- : Record count is 47
49
- I, [2011-06-05T22:08:53.529629 #560] INFO -- : Hit count is 42
50
- I, [2011-06-05T22:08:53.529629 #560] INFO -- : Page count is 0
51
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Total visit count is 1
52
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Human visit count is 1
53
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Drop visit count is 0
54
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Spider visit count is 0
55
- I, [2011-06-05T22:08:53.530629 #560] INFO -- : Batch processing time 0.014
56
- I, [2011-06-05T22:08:53.531629 #560] INFO -- : ending ... >>> 2011-06-05 22:08:53 -0400
57
- I, [2011-06-05T22:37:50.150958 #3476] INFO -- : starting ... >>> 2011-06-05 22:37:50 -0400
58
- I, [2011-06-05T22:37:51.664045 #3476] INFO -- : Record count is 5458
59
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Hit count is 5438
60
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Page count is 506
61
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Total visit count is 461
62
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Human visit count is 119
63
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Drop visit count is 78
64
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Spider visit count is 264
65
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Batch processing time 1.513087
66
- I, [2011-06-05T22:37:51.665045 #3476] INFO -- : ending ... >>> 2011-06-05 22:37:51 -0400
1
+ # Logfile created on Sat May 05 17:04:25 -0400 2012 by logger.rb/22285
2
+ I, [2012-05-05T17:04:25.715059 #2398] INFO -- : starting ... >>> Sat May 05 17:04:25 -0400 2012
3
+ I, [2012-05-05T17:08:58.497120 #2445] INFO -- : starting ... >>> Sat May 05 17:08:58 -0400 2012
4
+ I, [2012-05-05T17:12:10.618586 #2511] INFO -- : starting ... >>> Sat May 05 17:12:10 -0400 2012
5
+ I, [2012-05-05T17:18:56.448474 #2526] INFO -- : starting ... >>> Sat May 05 17:18:56 -0400 2012
6
+ I, [2012-05-05T17:19:48.272182 #2531] INFO -- : starting ... >>> Sat May 05 17:19:48 -0400 2012
7
+ I, [2012-05-05T17:23:04.812969 #2572] INFO -- : starting ... >>> Sat May 05 17:23:04 -0400 2012
8
+ I, [2012-05-05T17:27:53.760281 #2591] INFO -- : starting ... >>> Sat May 05 17:27:53 -0400 2012
9
+ I, [2012-05-05T17:30:00.755342 #2592] INFO -- : starting ... >>> Sat May 05 17:30:00 -0400 2012
10
+ I, [2012-05-05T17:30:10.734129 #2592] INFO -- : ending ... >>> Sat May 05 17:30:10 -0400 2012
11
+ I, [2012-05-05T17:30:37.881134 #2593] INFO -- : starting ... >>> Sat May 05 17:30:37 -0400 2012
12
+ I, [2012-05-05T17:30:47.799064 #2593] INFO -- : ending ... >>> Sat May 05 17:30:47 -0400 2012
@@ -0,0 +1,66 @@
1
+ # Logfile created on 2011-06-05 21:46:30 -0400 by logger.rb/25413
2
+ I, [2011-06-05T21:46:30.867462 #3444] INFO -- : starting ... >>> 2011-06-05 21:46:30 -0400
3
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Record count is 5458
4
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Hit count is 5438
5
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Page count is 506
6
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Total visit count is 461
7
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Human visit count is 119
8
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Drop visit count is 78
9
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Spider visit count is 264
10
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : Batch processing time 1.677403
11
+ I, [2011-06-05T21:46:32.576065 #3444] INFO -- : ending ... >>> 2011-06-05 21:46:32 -0400
12
+ I, [2011-06-05T21:46:53.028704 #5108] INFO -- : starting ... >>> 2011-06-05 21:46:53 -0400
13
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Record count is 30474
14
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Hit count is 30474
15
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Page count is 0
16
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Total visit count is 4484
17
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Human visit count is 4227
18
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Drop visit count is 0
19
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Spider visit count is 257
20
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : Batch processing time 13.675125
21
+ I, [2011-06-05T21:47:06.719429 #5108] INFO -- : ending ... >>> 2011-06-05 21:47:06 -0400
22
+ I, [2011-06-05T21:47:44.613806 #1112] INFO -- : starting ... >>> 2011-06-05 21:47:44 -0400
23
+ I, [2011-06-05T21:48:01.451639 #4252] INFO -- : starting ... >>> 2011-06-05 21:48:01 -0400
24
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Record count is 11870
25
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Hit count is 11870
26
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Page count is 0
27
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Total visit count is 1
28
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Human visit count is 1
29
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Drop visit count is 0
30
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Spider visit count is 0
31
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : Batch processing time 3.744208
32
+ I, [2011-06-05T21:48:05.195847 #4252] INFO -- : ending ... >>> 2011-06-05 21:48:05 -0400
33
+ I, [2011-06-05T21:48:20.829574 #4544] INFO -- : starting ... >>> 2011-06-05 21:48:20 -0400
34
+ I, [2011-06-05T21:50:44.333188 #960] INFO -- : starting ... >>> 2011-06-05 21:50:44 -0400
35
+ I, [2011-06-05T21:51:56.322305 #4712] INFO -- : starting ... >>> 2011-06-05 21:51:56 -0400
36
+ I, [2011-06-05T21:53:42.440375 #4832] INFO -- : starting ... >>> 2011-06-05 21:53:42 -0400
37
+ I, [2011-06-05T22:07:49.079943 #4560] INFO -- : starting ... >>> 2011-06-05 22:07:49 -0400
38
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Record count is 47
39
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Hit count is 42
40
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Page count is 0
41
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Total visit count is 1
42
+ I, [2011-06-05T22:07:49.184949 #4560] INFO -- : Human visit count is 1
43
+ I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Drop visit count is 0
44
+ I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Spider visit count is 0
45
+ I, [2011-06-05T22:07:49.185949 #4560] INFO -- : Batch processing time 0.105006
46
+ I, [2011-06-05T22:07:49.185949 #4560] INFO -- : ending ... >>> 2011-06-05 22:07:49 -0400
47
+ I, [2011-06-05T22:08:53.515628 #560] INFO -- : starting ... >>> 2011-06-05 22:08:53 -0400
48
+ I, [2011-06-05T22:08:53.529629 #560] INFO -- : Record count is 47
49
+ I, [2011-06-05T22:08:53.529629 #560] INFO -- : Hit count is 42
50
+ I, [2011-06-05T22:08:53.529629 #560] INFO -- : Page count is 0
51
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Total visit count is 1
52
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Human visit count is 1
53
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Drop visit count is 0
54
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Spider visit count is 0
55
+ I, [2011-06-05T22:08:53.530629 #560] INFO -- : Batch processing time 0.014
56
+ I, [2011-06-05T22:08:53.531629 #560] INFO -- : ending ... >>> 2011-06-05 22:08:53 -0400
57
+ I, [2011-06-05T22:37:50.150958 #3476] INFO -- : starting ... >>> 2011-06-05 22:37:50 -0400
58
+ I, [2011-06-05T22:37:51.664045 #3476] INFO -- : Record count is 5458
59
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Hit count is 5438
60
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Page count is 506
61
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Total visit count is 461
62
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Human visit count is 119
63
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Drop visit count is 78
64
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Spider visit count is 264
65
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : Batch processing time 1.513087
66
+ I, [2011-06-05T22:37:51.665045 #3476] INFO -- : ending ... >>> 2011-06-05 22:37:51 -0400
data/testit.rb CHANGED
@@ -6,9 +6,8 @@
6
6
  #
7
7
  # Author:: Al Kivi <al.kivi@vizitrax.com>
8
8
 
9
- # Use either the library version of the code
10
- ## require './lib/vizi_tracker'
11
- # Or replace the above line to reference the gem version, if installed
9
+ require 'rubygems' # needed for ruby 1.8.7
10
+ # require '...\vizi_tracker\lib\vizi\vizi_tracker'
12
11
  require 'vizi_tracker'
13
12
 
14
13
  require 'yaml'
@@ -21,10 +20,10 @@ config = YAML.load_file("config/logger.yml")
21
20
 
22
21
  # Initialize the log parser
23
22
  parser = Vizi::LogParser.new(config["drop_ips"], config["spider_ips"],
24
- config["spider_names"], config["page_urls"], config["hide_urls"],
25
- config["homepage"], config["accept_only_homepage"],config["hostname"],
26
- config["drop_refers_by_hostname"], config["use_local_time"],
27
- config["assigned_numbers"], config["match_page_numbers"])
23
+ config["spider_names"], config["include_urls"], config["exclude_urls"],
24
+ config["url_stem"], config["accept_only_url_stem"],config["hostname"],
25
+ config["drop_refers_by_hostname"], config["usual_agents"],
26
+ config["use_local_time"])
28
27
 
29
28
  syslog = Logger.new('./log/system.log',shift_age = 'weekly')
30
29
  case config["log_level"]
@@ -57,6 +56,7 @@ File.open('./data/exlog.log', 'r') do |file|
57
56
  logformat = nil
58
57
  # Begin to parse each record
59
58
  while(line = file.gets)
59
+ p line
60
60
  parsed_data = parser.parse_line(line, logformat)
61
61
  logformat = parsed_data[:p_logformat]
62
62
  rec_count = rec_count + 1
@@ -65,30 +65,29 @@ File.open('./data/exlog.log', 'r') do |file|
65
65
  page_count = page_count + 1 if parsed_data[:p_pageflag]
66
66
  @visit=vlist.find_by_ip(parsed_data[:ip])
67
67
  if @visit.nil?
68
- vlist.append(Vizi::Visit.new(parsed_data[:ip],parsed_data[:datetime],parsed_data[:csuristem],parsed_data[:csuriquery], parsed_data[:timetaken],
69
- parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase],parsed_data[:p_pageid]))
68
+ vlist.append(Vizi::Visit.new(parsed_data[:ip],parsed_data[:datetime],parsed_data[:csuristem],parsed_data[:csuriquery], parsed_data[:timetaken],
69
+ parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_returnhit],parsed_data[:p_pdfstem],config["visit_timeout"]))
70
70
  @visit=vlist.find_by_ip(parsed_data[:ip])
71
71
  visit_count = visit_count + 1
72
72
  else
73
- @visit.update(parsed_data[:datetime],parsed_data[:csuriquery],parsed_data[:timetaken],
74
- parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_searchphrase], parsed_data[:p_pageid])
73
+ @visit.update(parsed_data[:datetime],parsed_data[:p_visitortype],parsed_data[:p_pageflag],parsed_data[:p_returnhit],parsed_data[:p_pdfstem])
75
74
  end
76
75
  @visits = vlist.find_expired(@visit.start_dt)
77
76
  if @visits
78
77
  @visits.sendoutput
79
78
  vlist.delete(@visits)
80
- human_count = human_count + 1 if @visits.visitortype == "H"
81
- drop_count = drop_count + 1 if @visits.visitortype == "D"
82
- spider_count = spider_count + 1 if @visits.visitortype == "S"
79
+ human_count = human_count + 1 if @visits.vtype == "H"
80
+ drop_count = drop_count + 1 if @visits.vtype == "D"
81
+ spider_count = spider_count + 1 if @visits.vtype == "S"
83
82
  end
84
83
  break if rec_count == max_rec_count
85
84
  end
86
85
  @visits = vlist.find_all
87
86
  @visits.each {|v|
88
87
  v.sendoutput
89
- human_count = human_count + 1 if v.visitortype == "H"
90
- drop_count = drop_count + 1 if v.visitortype == "D"
91
- spider_count = spider_count + 1 if v.visitortype == "S"
88
+ human_count = human_count + 1 if v.vtype == "H"
89
+ drop_count = drop_count + 1 if v.vtype == "D"
90
+ spider_count = spider_count + 1 if v.vtype == "S"
92
91
  }
93
92
  if config["summary_flag"]
94
93
  syslog.info "Record count is "+rec_count.to_s
Binary file
data/vizi_tracker.gemspec CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
2
2
 
3
3
  spec = Gem::Specification.new do |s|
4
4
  s.name = 'vizi_tracker'
5
- s.version = '0.3.0'
5
+ s.version = '0.4.0'
6
6
  s.summary = "Visit tracking from Apache or IIS web log files"
7
7
  s.description = "This module provides a set of classes to support the parsing of web log files and
8
8
  the creation of visits from the individual parsed web log records.
@@ -17,5 +17,5 @@ spec = Gem::Specification.new do |s|
17
17
  s.homepage = "http://www.vizitrax.com"
18
18
  s.email = "al.kivi@yahoo.com"
19
19
  s.has_rdoc = true
20
- s.required_ruby_version = '>= 1.8.2'
20
+ s.required_ruby_version = '>= 1.8.7'
21
21
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vizi_tracker
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 15
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 3
8
+ - 4
9
9
  - 0
10
- version: 0.3.0
10
+ version: 0.4.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Al Kivi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-01-09 00:00:00 -05:00
18
+ date: 2012-05-05 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies: []
21
21
 
@@ -102,6 +102,8 @@ files:
102
102
  - Rakefile
103
103
  - log/parse.log
104
104
  - log/system.log
105
+ - log/system.log.20120428
106
+ - log/parse.log.20120428
105
107
  - test/parser_test.rb
106
108
  - test/test_helper.rb
107
109
  - config/logger.yml
@@ -109,6 +111,7 @@ files:
109
111
  - config/logger_sample.yml
110
112
  - config/logger_apache.yml
111
113
  - testit.rb
114
+ - vizi_tracker-0.4.0.gem
112
115
  - vizi_tracker.gemspec
113
116
  has_rdoc: true
114
117
  homepage: http://www.vizitrax.com
@@ -124,12 +127,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
124
127
  requirements:
125
128
  - - ">="
126
129
  - !ruby/object:Gem::Version
127
- hash: 51
130
+ hash: 57
128
131
  segments:
129
132
  - 1
130
133
  - 8
131
- - 2
132
- version: 1.8.2
134
+ - 7
135
+ version: 1.8.7
133
136
  required_rubygems_version: !ruby/object:Gem::Requirement
134
137
  none: false
135
138
  requirements: