double_agent 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,15 @@
1
+ == Release 0.2.0 (July 30, 2011)
2
+
3
+ * Seperate code into real modules
4
+
5
+ * Only load parser and resources by default
6
+
7
+ * Add subtotals to percentages_for
8
+
9
+ * Add more data parsing methods to DoubleAgent::Logs::Entry objects
10
+
11
+ * Various optimizations and stuff I forgot about
12
+
1
13
  == Release 0.1.2 (June 30, 2011)
2
14
 
3
15
  * Bugfix to #browser_sum and #os_sym returning nil on an empty or nil user agent. Should return :unkown.
data/README.rdoc CHANGED
@@ -9,20 +9,17 @@ agent strings. It is designed for parsing large sets for review or analysis.
9
9
 
10
10
  == Loading
11
11
 
12
- Double Agent is broken up into four components - core, resources, stats, logs
12
+ Double Agent is broken up into modules - Parser, Resources, Stats and Logs.
13
13
 
14
- # Load the default components (core, resources and stats)
14
+ # Load the core modules (Parser, Resources)
15
15
  require 'double_agent'
16
16
 
17
- # Load components individually (some have dependencies on others)
18
- require 'double_agent/core|resources|stats|logs'
17
+ # Load modules individually
18
+ require 'double_agent/parser|resources|stats|logs|all'
19
19
 
20
- # Load everything
21
- require 'double_agent/all'
20
+ = Parser
22
21
 
23
- = The Core Parser
24
-
25
- ua_string = "pretent I'm a user agent string for Chrome on Ubuntu"
22
+ ua_string = "I'm a user agent string for Chrome 11 on Ubuntu"
26
23
 
27
24
  DoubleAgent.browser(ua_string)
28
25
  => "Chrome 11"
@@ -41,7 +38,7 @@ See the DoubleAgent module for more.
41
38
  = Resources
42
39
 
43
40
  DoubleAgent::Resource is a mix-in for objects with a user_agent method or attribute.
44
- It gives that class's objects all of the above methods.
41
+ It gives that class's objects all of the above methods and more.
45
42
 
46
43
  class Login
47
44
  include DoubleAgent::Resource
@@ -61,29 +58,34 @@ It gives that class's objects all of the above methods.
61
58
 
62
59
  = Stats
63
60
 
64
- Calculate browser and OS shares for large sets of DoubleAgent::Resource objects with DoubleAgent::percentages_for.
61
+ Calculate browser and OS shares for large sets of DoubleAgent::Resource objects with DoubleAgent::Stats::percentages_for.
65
62
 
66
63
  logins = Login.all
67
- stats = DoubleAgent.percentages_for(logins, :browser_family, :os_family)
64
+ p logins.size
65
+ => 1000
68
66
 
67
+ stats = DoubleAgent::Stats.percentages_for(logins, :browser_family, :os_family)
69
68
  p stats
70
- => [["Firefox", "Windows", 50.0], ["Internet Explorer", "Windows", 20.0], ["Safari", "OS X", 20.0], ["Firefox", "GNU/Linux", 10.0]]
69
+ => [["Firefox", "Windows", 50.0, 500], ["Internet Explorer", "Windows", 20.0, 200], ["Safari", "OS X", 20.0, 200], ["Firefox", "GNU/Linux", 10.0, 100]]
71
70
 
72
- stats.each do |browser_family, os_family, percent|
73
- puts "#{browser_family} on #{os_family} - #{percent}%"
71
+ stats.each do |browser_family, os_family, percent, real_num|
72
+ puts "#{browser_family} on #{os_family} - #{percent}% (#{real_num} hits)"
74
73
  end
74
+ => "Firefox on Windows - 50% (500 hits)"
75
+ => "Internet Explorer on Windows - 20% (200 hits)"
76
+ => "Safari on OS X - 20% (200 hits)"
77
+ => "Firefox on GNU/Linux - 10% (100 hits)"
75
78
 
76
79
  = Logs
77
80
 
78
- DoubleAgent::log_entries parses through Apache and Nginx access logs, instantiating each log line into a DoubleAgent::LogEntry
79
- object. It even reads gzipped logs (requires zlib)! Since the DoubleAgent::LogEntry class mixes in DoubleAgent::Resource, you
80
- can easily calculate browser and/or OS market share on your site.
81
-
82
- require 'double_agent'
83
- require 'double_agent/logs'
81
+ DoubleAgent::Logs::entries parses through Apache and Nginx access logs, instantiating each log line into a DoubleAgent::Logs::Entry
82
+ object. It even reads gzipped logs (requires zlib)! Since the DoubleAgent::Logs::Entry class mixes in DoubleAgent::Resource, you
83
+ can easily calculate browser and/or OS share on your site.
84
84
 
85
- entries = DoubleAgent.log_entries("/var/log/nginx/my-site.access.log*")
86
- stats = DoubleAgent.percentages_for(entries, :browser)
85
+ entries = DoubleAgent::Logs.entries("/var/log/nginx/my-site.access.log*")
86
+ stats = DoubleAgent::Stats.percentages_for(entries, :browser)
87
87
 
88
88
  p stats
89
- => [["Firefox 4", 20.0], ["Internet Explorer 8", 18.0], ...]
89
+ => [["Firefox 4", 20.0, 650], ["Internet Explorer 8", 18.0, 587], ...]
90
+
91
+ Check out the DoubleAgent::Logs::Entry class for more methods besides user_agent.
@@ -1,2 +1,4 @@
1
- require 'double_agent'
1
+ require 'double_agent/parser'
2
+ require 'double_agent/resources'
3
+ require 'double_agent/stats'
2
4
  require 'double_agent/logs'
@@ -1,41 +1,102 @@
1
- require 'zlib'
1
+ require 'date'
2
2
 
3
3
  module DoubleAgent
4
- # Accepts a glob path like /var/logs/apache/my-site.access.log*,
5
- # parses all matching files into an array of LegEntry objects, and returns them.
6
- #
7
- # options[:match] and options[:ignore] can each take a regular expression,
8
- # ignoring lines that do and don't match, respectively.
9
- def self.log_entries(glob_str, options={})
10
- gz_regexp = /\.gz\Z/i
11
- entries = []
12
- parse = (options[:match] or options[:ignore]) \
13
- ? lambda { |line| entries << LogEntry.new(line) if (options[:match].nil? or line =~ options[:match]) and (options[:ignore].nil? or line !~ options[:ignore]) } \
14
- : lambda { |line| entries << LogEntry.new(line) }
15
- Dir.glob(glob_str).each do |f|
16
- File.open(f, 'r') do |file|
17
- handle = f =~ gz_regexp ? Zlib::GzipReader.new(file) : file
18
- handle.readlines.each &parse
4
+ #
5
+ # The Logs module contains methods and classes for parsing user agent strings
6
+ # from Apache-style logs (includes default Nginx log format). Gzipped logs
7
+ # are also supported.
8
+ #
9
+ module Logs
10
+ begin
11
+ require 'zlib'
12
+ ZLIB = true
13
+ rescue LoadError
14
+ $stderr.puts "Zlib not available for DoubleAgent::Logs; gzipped log files will be skipped."
15
+ ZLIB = false
16
+ end
17
+
18
+ # This class represents a line in an Apache or Nginx access log.
19
+ # The user agent string is parsed out and available through the
20
+ # user_agent attribute, making it available to the mixed-in DoubleAgent::Resource methods.
21
+ # Datestamps and Timestamps may also be retrieved for each instance, using the #on and #at methods, respectively.
22
+
23
+ class Entry
24
+ include DoubleAgent::Resource
25
+ # Returns the user agent string
26
+ attr_reader :user_agent, :line
27
+
28
+ # Regular expression for pulling a user agent string out of a log entry. It is rather imprecise
29
+ # only for efficiency's sake.
30
+ USER_AGENT_REGEXP = /" ".+$/
31
+
32
+ # Regexp for parsing an IP address
33
+ IP_REGEXP = /^[0-9a-z\.:]+/
34
+
35
+ # Regex for parsing the date out of the log line
36
+ DATESTAMP_REGEXP = %r{[0-9]+/[a-z]+/[0-9]+:}i
37
+ # Regex for parsing DATESTAMP_REGEXP into a Date object
38
+ DATESTAMP_FORMAT = '%d/%B/%Y:'
39
+
40
+ # Regex for parsing the datetime out of the log line
41
+ TIMESTAMP_REGEXP = %r{[0-9]+/[a-z]+/[0-9]+:[0-9]+:[0-9]+:[0-9]+ (-|\+)[0-9]+}i
42
+ # Regex for parsing TIMESTAMP_REGEXP into a DateTime object
43
+ TIMESTAMP_FORMAT = '%d/%B/%Y:%H:%M:%S %z'
44
+
45
+ # Initializes a new Entry object. An Apache or Nginx log line should be
46
+ # passed to it.
47
+ def initialize(line)
48
+ @line = line
49
+ @user_agent = line.slice(USER_AGENT_REGEXP)
50
+ end
51
+
52
+ # Returns the IP address the hit originated from
53
+ def ip
54
+ @line.slice(IP_REGEXP)
55
+ end
56
+
57
+ # Returns the Date the hit occurred on
58
+ def on
59
+ date_str = @line.slice(DATESTAMP_REGEXP)
60
+ date_str ? Date.strptime(date_str, DATESTAMP_FORMAT) : nil
61
+ end
62
+
63
+ # Returns the DateTime the hit occurred at
64
+ def at
65
+ datetime_str = @line.slice(TIMESTAMP_REGEXP)
66
+ datetime_str ? DateTime.strptime(datetime_str, TIMESTAMP_FORMAT) : nil
19
67
  end
20
68
  end
21
- entries
22
- end
23
69
 
24
- # This class represents a line in an Apache or Nginx access log.
25
- # The user agent string is parsed out and available through the
26
- # user_agent attribute, making it available to the mixed-in DoubleAgent::Resource.
27
-
28
- class LogEntry
29
- # Regular expression for pulling a user agent string out of a log entry
30
- USER_AGENT_REGEXP = /" ".+$/
31
- include DoubleAgent::Resource
32
- # Returns the user agent string
33
- attr_reader :user_agent
34
-
35
- # Initializes a new LogEntry object. An Apache or Nginx log line should be
36
- # passed to it.
37
- def initialize(line)
38
- @user_agent = line.slice(USER_AGENT_REGEXP)
70
+ # Accepts a glob path like /var/logs/apache/my-site.access.log*,
71
+ # parses all matching files into an array of Entry objects, and returns them.
72
+ # Gzipped log files are parsed by Zlib.
73
+ #
74
+ # Options:
75
+ #
76
+ # :match A regular expression. Only lines which match this will be returned.
77
+ # :ignore A regular expression. Any lines which match this will be ignored.
78
+ def self.entries(glob_str, options={})
79
+ match, ignore = options[:match], options[:ignore]
80
+ entries = []
81
+
82
+ # Define the parse lambda
83
+ parse = (match or ignore) \
84
+ ? lambda { |line| entries << Entry.new(line) unless (match and line !~ match) or (ignore and line =~ ignore) } \
85
+ : lambda { |line| entries << Entry.new(line) }
86
+
87
+ # Define the read lambda
88
+ read = lambda do |f|
89
+ zipped = f =~ /\.gz\Z/i
90
+ return unless ZLIB or not zipped
91
+ File.open(f, 'r') do |file|
92
+ handle = zipped ? Zlib::GzipReader.new(file) : file
93
+ #handle.each_line &parse # A little slower, but may be more memory efficient
94
+ handle.readlines.each &parse
95
+ end
96
+ end
97
+
98
+ Dir.glob(glob_str).each &read
99
+ entries
39
100
  end
40
101
  end
41
102
  end
@@ -34,7 +34,7 @@ module DoubleAgent
34
34
  end
35
35
  end
36
36
 
37
- # Returns the browser's name. If you provide an user agent string as an argument,
37
+ # Returns the browser's name. If you provide a user agent string as an argument,
38
38
  # it will attempt to also return the major version number. E.g. "Firefox 4".
39
39
  def browser(ua=nil)
40
40
  if ua and (@version or @safe_version)
@@ -48,7 +48,7 @@ module DoubleAgent
48
48
  # BrowserParser would return the Chromium BrowserParser. For browsers that are their
49
49
  # own family (e.g. Firefox, IE) it will end up returning itself.
50
50
  def family
51
- BROWSERS[family_sym]
51
+ BROWSERS[@family_sym]
52
52
  end
53
53
 
54
54
  private
@@ -56,7 +56,7 @@ module DoubleAgent
56
56
  # Attempts to parse and return the browser's version from a user agent string. Returns
57
57
  # nil if nothing is found.
58
58
  def version(ua)
59
- if @safe_version and RUBY_VERSION < MIN_VERSION
59
+ if @safe_version
60
60
  ua.slice(@safe_version[0]).slice(@safe_version[1])
61
61
  else
62
62
  ua.slice(@version)
@@ -83,7 +83,7 @@ module DoubleAgent
83
83
  # OSParser would return the GNU/Linux OSerParser. For OSes that are their own
84
84
  # family (e.g. OS X) it will end up returning itself.
85
85
  def family
86
- OSES[family_sym]
86
+ OSES[@family_sym]
87
87
  end
88
88
  end
89
89
 
@@ -1,49 +1,65 @@
1
1
  module DoubleAgent
2
- # True if running under less than Ruby 1.9
3
- BAD_RUBY = RUBY_VERSION < '1.9.0'
4
-
5
- if BAD_RUBY
6
- require 'bigdecimal'
7
- # If BAD_RUBY, this is used in lieu of the native round method
8
- def self.better_round(f, n)
9
- d = BigDecimal.new f.to_s
10
- d.round(n).to_f
11
- end
12
- end
13
-
14
- # For the given "things", returns the share of the group that each attr has.
15
- #
16
- # "things" is an array of objects who's classes "include DoubleAgent::Resource".
17
- #
18
- # "args" is one or more method symbols from DoubleAgent::Resource.
19
2
  #
20
- # "args" may have, as it's last member, :threshold => n, where n is the lowest
21
- # percentage you want returned.
3
+ # The Stats module provides methods for determining browser and/or OS share
4
+ # for large numbers of DoubleAgent::Resource objects.
22
5
  #
23
- # Example, Browser Family share:
24
- # DoubleAgent.percentages_for(logins, :browser_family)
25
- # [['Firefox', 50.4], ['Chrome', 19.6], ['Internet Explorer', 15], ['Safari', 10], ['Unknown', 5]]
26
- #
27
- # Example, Browser/OS share, asking for symbols back:
28
- # DoubleAgent.percentages_for(server_log_entries, :browser_sym, :os_sym)
29
- # [[:firefox, :windows_7, 50.4], [:chrome, :osx, 19.6], [:msie, :windows_xp, 15], [:safari, :osx, 10], [:other, :other, 5]]
30
- def self.percentages_for(things, *args)
31
- options = args.last.is_a?(Hash) ? args.pop : {} # Break out options
32
- p = {}
33
- things.each do |h|
34
- syms = args.map { |attr| h.send attr }
35
- p[syms] ||= 0
36
- p[syms] += 1
37
- end
38
- size = things.size.to_f
39
- p = p.to_a
6
+ module Stats
7
+ # True if running under less than Ruby 1.9
8
+ BAD_RUBY = RUBY_VERSION < '1.9.0'
9
+
40
10
  if BAD_RUBY
41
- p.collect! { |k,n| [*k.<<(better_round(((n * 100) / size), 2))] }
42
- else
43
- p.collect! { |k,n| [*k.<<(((n * 100) / size).round(2))] }
11
+ require 'bigdecimal'
12
+ # If BAD_RUBY, this is used in lieu of the native round method
13
+ def self.better_round(f, n)
14
+ d = BigDecimal.new f.to_s
15
+ d.round(n).to_f
16
+ end
17
+ end
18
+
19
+ # For the given "things", returns the share of the group that each attr has.
20
+ #
21
+ # "things" is an array of objects who's classes mix-in DoubleAgent::Resource.
22
+ #
23
+ # "args" is one or more method symbols from DoubleAgent::Resource.
24
+ #
25
+ # "args" may have, as it's last member, :threshold => n, where n is the number of the lowest
26
+ # percentage you want returned.
27
+ #
28
+ # Returns an array of [attribute(s), percent of total, number of total]
29
+ #
30
+ # Example, Browser Family share:
31
+ #
32
+ # DoubleAgent::Stats.percentages_for(logins, :browser_family)
33
+ # [['Firefox', 50.4, 5040], ['Chrome', 19.6, 1960], ['Internet Explorer', 15, 1500], ['Safari', 10, 1000], ['Unknown', 5, 500]]
34
+ #
35
+ # Example, Browser/OS share, asking for symbols back:
36
+ #
37
+ # DoubleAgent::Stats.percentages_for(server_log_entries, :browser_sym, :os_sym)
38
+ # [[:firefox, :windows_7, 50.4, 5040], [:chrome, :osx, 19.6, 1960], [:msie, :windows_xp, 15, 1500], [:safari, :osx, 10, 1000], [:other, :other, 5, 100]]
39
+ def self.percentages_for(things, *args)
40
+ options = args.last.is_a?(Hash) ? args.pop : {} # Break out options
41
+ results = {}
42
+ # Count each instance
43
+ things.each do |h|
44
+ syms = args.map { |attr| h.send attr }
45
+ results[syms] ||= 0
46
+ results[syms] += 1
47
+ end
48
+ size = things.size.to_f
49
+ results = results.to_a
50
+ # From the total, calculate the percentage held by each browser, OS, etc.
51
+ if BAD_RUBY
52
+ results.collect! { |k,n| [*k.<<(better_round(((n * 100) / size), 2)).<<(n)] }
53
+ else
54
+ # Ruby 1.9 syntax that blows up in Ruby 1.8
55
+ #results.collect! { |k,n| [*k, ((n * 100) / size).round(2), n] }
56
+ results.collect! { |k,n| [*k.<<(((n * 100) / size).round(2)).<<(n)] }
57
+ end
58
+ # Sort in ascending order
59
+ results.sort! { |a,b| b.last <=> a.last }
60
+ # Reject percentages below a specified threshold
61
+ results.reject! { |a| a[-2] < options[:threshold] } if options[:threshold]
62
+ results
44
63
  end
45
- p.sort! { |a,b| b.last <=> a.last }
46
- p.reject! { |a| a.last < options[:threshold] } if options[:threshold]
47
- p
48
64
  end
49
65
  end
data/lib/double_agent.rb CHANGED
@@ -1,3 +1,2 @@
1
- require 'double_agent/core'
1
+ require 'double_agent/parser'
2
2
  require 'double_agent/resources'
3
- require 'double_agent/stats'
data/spec/data_spec.rb ADDED
@@ -0,0 +1,100 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ DA = DoubleAgent
4
+
5
+ describe DoubleAgent do
6
+ # Internet Explorer
7
+ it 'should be Internet Explorer 10 on Windows 8' do
8
+ ua = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/5.0"
9
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 10 on Windows 8'
10
+ end
11
+
12
+ it 'should be Internet Explorer 9 on Windows 7' do
13
+ ua = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0"
14
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 9 on Windows 7'
15
+ end
16
+
17
+ it 'should be Internet Explorer 8 on Windows Vista' do
18
+ ua = "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Win64; x64; Trident/5.0"
19
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 8 on Windows Vista'
20
+ end
21
+
22
+ it 'should be Internet Explorer 7 on Windows XP' do
23
+ ua = "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.2; Win64; x64; Trident/5.0"
24
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 7 on Windows XP'
25
+ end
26
+
27
+ it 'should be Internet Explorer 7 on Windows XP' do
28
+ ua = "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.1; Win64; x64; Trident/5.0"
29
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 7 on Windows XP'
30
+ end
31
+
32
+ # Chrome
33
+ it 'should be Chrome 12 on Windows XP' do
34
+ ua = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.25 (KHTML, like Gecko) Chrome/12.0.706.0 Safari/534.25"
35
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Chrome 12 on Windows XP'
36
+ end
37
+
38
+ # Chromium
39
+ it 'should be Chrome 12 on Ubuntu' do
40
+ ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.703.0 Chrome/12.0.703.0 Safari/534.24"
41
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Chrome 12 on Ubuntu'
42
+ end
43
+
44
+ # Android
45
+ it 'should be Android 2.3 on Android' do
46
+ ua = "Mozilla/5.0 (Linux; U; Android 2.3.3; zh-tw; HTC_Pyramid Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"
47
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Android 2.3 on Android'
48
+ end
49
+
50
+ # Safari
51
+ it 'should be Safari 5 on OS X' do
52
+ ua = "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27"
53
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Safari 5 on OS X'
54
+ end
55
+
56
+ # Opera
57
+ it 'should be Opera 11 on GNU/Linux' do
58
+ ua = "Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00"
59
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Opera 11 on GNU/Linux'
60
+ end
61
+
62
+ it 'should be Opera 11 on Windows 7' do
63
+ ua = "Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01"
64
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Opera 11 on Windows 7'
65
+ end
66
+
67
+ # Firefox
68
+ it 'should be Firefox 4 on GNU/Linux' do
69
+ ua = "Mozilla/5.0 (X11; U; Linux x86_64; pl-PL; rv:2.0) Gecko/20110307 Firefox/4.0"
70
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Firefox 4 on GNU/Linux'
71
+ end
72
+
73
+ # Epiphany
74
+ it 'should be Epiphany on GNU/Linux' do
75
+ ua = "Mozilla/5.0 (X11; U; Linux x86_64; fr-FR) AppleWebKit/534.7 (KHTML, like Gecko) Epiphany/2.30.6 Safari/534.7"
76
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Epiphany on GNU/Linux'
77
+ end
78
+
79
+ # Konqueror
80
+ it 'should be Konqueror on FreeBSD' do
81
+ ua = "Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)"
82
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on FreeBSD'
83
+ end
84
+
85
+ it 'should be Konqueror on Fedora' do
86
+ ua = "Mozilla/5.0 (compatible; Konqueror/4.4; Linux) KHTML/4.4.1 (like Gecko) Fedora/4.4.1-1.fc12"
87
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on Fedora'
88
+ end
89
+
90
+ it 'should be Konqueror on Slackware' do
91
+ ua = "Mozilla/5.0 (compatible; Konqueror/4.2; Linux) KHTML/4.2.4 (like Gecko) Slackware/13.0"
92
+ "#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on Slackware'
93
+ end
94
+
95
+ # BlackBerry
96
+ it 'should be BlackBerry on BlackBerry' do
97
+ ua = "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; zh-TW) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.448 Mobile Safari/534.8+"
98
+ "#{DA.browser ua} on #{DA.os ua}".should == 'BlackBerry on BlackBerry'
99
+ end
100
+ end
data/spec/parser_spec.rb CHANGED
@@ -1,100 +1,114 @@
1
1
  require File.dirname(__FILE__) + '/spec_helper'
2
2
 
3
- DA = DoubleAgent
4
-
5
3
  describe DoubleAgent do
6
- # Internet Explorer
7
- it 'should be Internet Explorer 10 on Windows 8' do
8
- ua = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/5.0"
9
- "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 10 on Windows 8'
10
- end
11
-
12
- it 'should be Internet Explorer 9 on Windows 7' do
13
- ua = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0"
14
- "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 9 on Windows 7'
15
- end
16
-
17
- it 'should be Internet Explorer 8 on Windows Vista' do
18
- ua = "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Win64; x64; Trident/5.0"
19
- "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 8 on Windows Vista'
20
- end
21
-
22
- it 'should be Internet Explorer 7 on Windows XP' do
23
- ua = "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.2; Win64; x64; Trident/5.0"
24
- "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 7 on Windows XP'
25
- end
26
-
27
- it 'should be Internet Explorer 7 on Windows XP' do
28
- ua = "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.1; Win64; x64; Trident/5.0"
29
- "#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 7 on Windows XP'
30
- end
31
-
32
- # Chrome
33
- it 'should be Chrome 12 on Windows XP' do
34
- ua = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.25 (KHTML, like Gecko) Chrome/12.0.706.0 Safari/534.25"
35
- "#{DA.browser ua} on #{DA.os ua}".should == 'Chrome 12 on Windows XP'
36
- end
37
-
38
- # Chromium
39
- it 'should be Chrome 12 on Ubuntu' do
40
- ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.703.0 Chrome/12.0.703.0 Safari/534.24"
41
- "#{DA.browser ua} on #{DA.os ua}".should == 'Chrome 12 on Ubuntu'
42
- end
43
-
44
- # Android
45
- it 'should be Android 2.3 on Android' do
46
- ua = "Mozilla/5.0 (Linux; U; Android 2.3.3; zh-tw; HTC_Pyramid Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"
47
- "#{DA.browser ua} on #{DA.os ua}".should == 'Android 2.3 on Android'
48
- end
49
-
50
- # Safari
51
- it 'should be Safari 5 on OS X' do
52
- ua = "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27"
53
- "#{DA.browser ua} on #{DA.os ua}".should == 'Safari 5 on OS X'
54
- end
55
-
56
- # Opera
57
- it 'should be Opera 11 on GNU/Linux' do
58
- ua = "Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00"
59
- "#{DA.browser ua} on #{DA.os ua}".should == 'Opera 11 on GNU/Linux'
60
- end
61
-
62
- it 'should be Opera 11 on Windows 7' do
63
- ua = "Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01"
64
- "#{DA.browser ua} on #{DA.os ua}".should == 'Opera 11 on Windows 7'
65
- end
66
-
67
- # Firefox
68
- it 'should be Firefox 4 on GNU/Linux' do
69
- ua = "Mozilla/5.0 (X11; U; Linux x86_64; pl-PL; rv:2.0) Gecko/20110307 Firefox/4.0"
70
- "#{DA.browser ua} on #{DA.os ua}".should == 'Firefox 4 on GNU/Linux'
71
- end
72
-
73
- # Epiphany
74
- it 'should be Epiphany on GNU/Linux' do
75
- ua = "Mozilla/5.0 (X11; U; Linux x86_64; fr-FR) AppleWebKit/534.7 (KHTML, like Gecko) Epiphany/2.30.6 Safari/534.7"
76
- "#{DA.browser ua} on #{DA.os ua}".should == 'Epiphany on GNU/Linux'
77
- end
78
-
79
- # Konqueror
80
- it 'should be Konqueror on FreeBSD' do
81
- ua = "Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)"
82
- "#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on FreeBSD'
83
- end
84
-
85
- it 'should be Konqueror on Fedora' do
86
- ua = "Mozilla/5.0 (compatible; Konqueror/4.4; Linux) KHTML/4.4.1 (like Gecko) Fedora/4.4.1-1.fc12"
87
- "#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on Fedora'
88
- end
89
-
90
- it 'should be Konqueror on Slackware' do
91
- ua = "Mozilla/5.0 (compatible; Konqueror/4.2; Linux) KHTML/4.2.4 (like Gecko) Slackware/13.0"
92
- "#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on Slackware'
93
- end
94
-
95
- # BlackBerry
96
- it 'should be BlackBerry on BlackBerry' do
97
- ua = "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; zh-TW) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.448 Mobile Safari/534.8+"
98
- "#{DA.browser ua} on #{DA.os ua}".should == 'BlackBerry on BlackBerry'
4
+ context 'Parser' do
5
+ before do
6
+ @ua_string = 'Mozilla/5.0 (X11; Ubuntu Linux i686; rv:2.0) Gecko/20100101 Firefox/4.0'
7
+ end
8
+
9
+ #browser
10
+ it 'returns Firefox 4 for browser' do
11
+ DoubleAgent.browser(@ua_string).should == 'Firefox 4'
12
+ end
13
+ it 'returns Unknown for browser' do
14
+ DoubleAgent.browser('froofroo').should == 'Unknown'
15
+ end
16
+
17
+ #browser_sym
18
+ it 'returns :firefox for browser_sym' do
19
+ DoubleAgent.browser_sym(@ua_string).should == :firefox
20
+ end
21
+ it 'returns :unknown for browser_sym' do
22
+ DoubleAgent.browser_sym('froofroo').should == :unknown
23
+ end
24
+ it 'returns :unknown for an empty browser_sym' do
25
+ DoubleAgent.browser_sym('').should == :unknown
26
+ end
27
+ it 'returns :unknown for a nil browser_sym' do
28
+ DoubleAgent.browser_sym(nil).should == :unknown
29
+ end
30
+
31
+ #browser_family
32
+ it 'returns Firefox for browser family' do
33
+ DoubleAgent.browser_family(@ua_string).should == 'Firefox'
34
+ end
35
+
36
+ #browser_family_sym
37
+ it 'returns :firefox for browser_family_sym' do
38
+ DoubleAgent.browser_family_sym(@ua_string).should == :firefox
39
+ end
40
+ it 'returns :unknown for an empty browser_family_sym' do
41
+ DoubleAgent.browser_family_sym('').should == :unknown
42
+ end
43
+ it 'returns :unknown for a nil browser_family_sym' do
44
+ DoubleAgent.browser_family_sym(nil).should == :unknown
45
+ end
46
+
47
+ #browser_icon
48
+ it 'returns :firefox for browser_sym' do
49
+ DoubleAgent.browser_icon(@ua_string).should == :firefox
50
+ end
51
+ it 'returns :unkown for an empty browser_sym' do
52
+ DoubleAgent.browser_icon('').should == :unknown
53
+ end
54
+ it 'returns :unkown for a nil browser_sym' do
55
+ DoubleAgent.browser_icon(nil).should == :unknown
56
+ end
57
+
58
+ #browser_family_icon
59
+ it 'returns :firefox for browser_family_sym' do
60
+ DoubleAgent.browser_family_icon(@ua_string).should == :firefox
61
+ end
62
+ it 'returns :unkown for an empty browser_family_sym' do
63
+ DoubleAgent.browser_family_icon('').should == :unknown
64
+ end
65
+ it 'returns :unkown for a nil browser_family_sym' do
66
+ DoubleAgent.browser_family_icon(nil).should == :unknown
67
+ end
68
+
69
+ #os
70
+ it 'returns Ubuntua for OS' do
71
+ DoubleAgent.os(@ua_string).should == 'Ubuntu'
72
+ end
73
+ it 'returns Unknowna for OS' do
74
+ DoubleAgent.os('froofroo').should == 'Unknown'
75
+ end
76
+ it 'returns Unknowna for OS' do
77
+ DoubleAgent.os('').should == 'Unknown'
78
+ end
79
+
80
+ #os_sym
81
+ it 'returns :ubuntu for os_sym' do
82
+ DoubleAgent.os_sym(@ua_string).should == :ubuntu
83
+ end
84
+ it 'returns :unknown for os_sym' do
85
+ DoubleAgent.os_sym('froofroo').should == :unknown
86
+ end
87
+ it 'returns :unknown for an empty os_sym' do
88
+ DoubleAgent.os_sym('').should == :unknown
89
+ end
90
+ it 'returns :unknown for a nil os_sym' do
91
+ DoubleAgent.os_sym(nil).should == :unknown
92
+ end
93
+
94
+ #os_family
95
+ it 'returns GNU/Linux OS family' do
96
+ DoubleAgent.os_family(@ua_string).should == 'GNU/Linux'
97
+ end
98
+
99
+ #os_family_sym
100
+ it 'returns :linux for os_family_sym' do
101
+ DoubleAgent.os_family_sym(@ua_string).should == :linux
102
+ end
103
+
104
+ #os_icon
105
+ it 'returns :ubuntu for os_sym' do
106
+ DoubleAgent.os_icon(@ua_string).should == :ubuntu
107
+ end
108
+
109
+ #os_family_icon
110
+ it 'returns :linux for os_family_sym' do
111
+ DoubleAgent.os_family_icon(@ua_string).should == :linux
112
+ end
99
113
  end
100
114
  end
data/spec/spec_helper.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'rspec'
2
- require File.dirname(__FILE__) + '/../lib/double_agent/core'
2
+ require File.dirname(__FILE__) + '/../lib/double_agent/parser'
3
3
  require File.dirname(__FILE__) + '/../lib/double_agent/resources'
4
4
  require File.dirname(__FILE__) + '/../lib/double_agent/stats'
5
5
  require File.dirname(__FILE__) + '/../lib/double_agent/logs'
@@ -7,3 +7,13 @@ require File.dirname(__FILE__) + '/../lib/double_agent/logs'
7
7
  Rspec.configure do |c|
8
8
  c.mock_with :rspec
9
9
  end
10
+
11
+ module Kernel
12
+ def suppress_warnings
13
+ original_verbosity = $VERBOSE
14
+ $VERBOSE = nil
15
+ result = yield
16
+ $VERBOSE = original_verbosity
17
+ return result
18
+ end
19
+ end
data/spec/stats_spec.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require File.dirname(__FILE__) + '/spec_helper'
2
2
 
3
3
  log_glob = File.dirname(__FILE__) + '/data/*.access.log*'
4
- entries = DoubleAgent::log_entries(log_glob, :match => /^\d/)
4
+ entries = DoubleAgent::Logs::entries(log_glob, :match => /^\d/)
5
5
 
6
6
  describe DoubleAgent do
7
7
  context 'Logs' do
@@ -10,27 +10,54 @@ describe DoubleAgent do
10
10
  end
11
11
 
12
12
  it 'should have loaded n log entries' do
13
- DoubleAgent::log_entries(log_glob, :match => /^\d/, :ignore => %r{ /dashboard }).size.should == 44
13
+ DoubleAgent::Logs::entries(log_glob, :match => /^\d/, :ignore => %r{ /dashboard }).size.should == 44
14
+ end
15
+
16
+ context 'without zlib' do
17
+ it 'should have loaded n log entries' do
18
+ suppress_warnings { DoubleAgent::Logs::ZLIB = false }
19
+ plain_entries = DoubleAgent::Logs::entries(log_glob, :match => /^\d/)
20
+ suppress_warnings { DoubleAgent::Logs::ZLIB = true }
21
+ plain_entries.size.should == 17
22
+ end
23
+ end
24
+
25
+ context 'parsing other data' do
26
+ before :each do
27
+ @line = DoubleAgent::Logs::Entry.new '68.52.99.211 - - [04/May/2011:08:21:04 -0400] "GET / HTTP/1.1" 200 1312 "-" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.17) Gecko/20110420 Firefox/3.6.17"'
28
+ end
29
+
30
+ it 'should parse the IP' do
31
+ @line.ip.should == '68.52.99.211'
32
+ end
33
+
34
+ it 'should parse datestamps' do
35
+ @line.on.should == Date.new(2011, 5, 4)
36
+ end
37
+
38
+ it 'should parse timestamps' do
39
+ @line.at.should == DateTime.new(2011, 5, 4, 8, 21, 4, '-0400')
40
+ end
14
41
  end
15
42
  end
16
43
 
17
44
  context 'Stats' do
18
45
  it 'should calculate stats' do
19
- stats = DoubleAgent.percentages_for entries, :browser_family, :os_family
20
- answer = [["Internet Explorer", "Windows", 42.55],
21
- ["Chromium", "GNU/Linux", 40.43],
22
- ["Firefox", "GNU/Linux", 10.64],
23
- ["Firefox", "OS X", 4.26],
24
- ["Safari", "OS X", 2.13]]
46
+ stats = DoubleAgent::Stats.percentages_for entries, :browser_family, :os_family
47
+ answer = [["Internet Explorer", "Windows", 42.55, 20],
48
+ ["Chromium", "GNU/Linux", 40.43, 19],
49
+ ["Firefox", "GNU/Linux", 10.64, 5],
50
+ ["Firefox", "OS X", 4.26, 2],
51
+ ["Safari", "OS X", 2.13, 1]]
25
52
  stats.should == answer
26
53
  end
27
54
 
28
55
  it 'should ignore stats below the threshold' do
29
- stats = DoubleAgent.percentages_for entries, :browser_family, :os_family, :threshold => 3.0
30
- answer = [["Internet Explorer", "Windows", 42.55],
31
- ["Chromium", "GNU/Linux", 40.43],
32
- ["Firefox", "GNU/Linux", 10.64],
33
- ["Firefox", "OS X", 4.26]]
56
+ stats = DoubleAgent::Stats.percentages_for entries, :browser_family, :os_family, :threshold => 3.0
57
+ answer = [["Internet Explorer", "Windows", 42.55, 20],
58
+ ["Chromium", "GNU/Linux", 40.43, 19],
59
+ ["Firefox", "GNU/Linux", 10.64, 5],
60
+ ["Firefox", "OS X", 4.26, 2]]
34
61
  stats.should == answer
35
62
  end
36
63
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 1
8
7
  - 2
9
- version: 0.1.2
8
+ - 0
9
+ version: 0.2.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jordan Hollinger
@@ -14,11 +14,11 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-06-30 00:00:00 -04:00
17
+ date: 2011-07-30 00:00:00 -04:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
21
- description: Browser User Agent string parser with resource, stats, and a log reader
21
+ description: Browser User Agent string parser with resources, stats, and a log reader
22
22
  email: jordan@jordanhollinger.com
23
23
  executables: []
24
24
 
@@ -31,17 +31,17 @@ files:
31
31
  - lib/double_agent.rb
32
32
  - lib/double_agent/all.rb
33
33
  - lib/double_agent/logs.rb
34
- - lib/double_agent/core.rb
35
34
  - lib/double_agent/resources.rb
36
35
  - lib/double_agent/stats.rb
36
+ - lib/double_agent/parser.rb
37
37
  - data/browsers.yml
38
38
  - data/oses.yml
39
39
  - spec/spec_helper.rb
40
- - spec/core_spec.rb
41
40
  - spec/data/httpd.access.log.1.gz
42
41
  - spec/data/httpd.access.log
43
42
  - spec/stats_spec.rb
44
43
  - spec/resources_spec.rb
44
+ - spec/data_spec.rb
45
45
  - spec/parser_spec.rb
46
46
  - README.rdoc
47
47
  - LICENSE
data/spec/core_spec.rb DELETED
@@ -1,114 +0,0 @@
1
- require File.dirname(__FILE__) + '/spec_helper'
2
-
3
- describe DoubleAgent do
4
- context 'Core' do
5
- before do
6
- @ua_string = 'Mozilla/5.0 (X11; Ubuntu Linux i686; rv:2.0) Gecko/20100101 Firefox/4.0'
7
- end
8
-
9
- #browser
10
- it 'returns Firefox 4 for browser' do
11
- DoubleAgent.browser(@ua_string).should == 'Firefox 4'
12
- end
13
- it 'returns Unknown for browser' do
14
- DoubleAgent.browser('froofroo').should == 'Unknown'
15
- end
16
-
17
- #browser_sym
18
- it 'returns :firefox for browser_sym' do
19
- DoubleAgent.browser_sym(@ua_string).should == :firefox
20
- end
21
- it 'returns :unknown for browser_sym' do
22
- DoubleAgent.browser_sym('froofroo').should == :unknown
23
- end
24
- it 'returns :unknown for an empty browser_sym' do
25
- DoubleAgent.browser_sym('').should == :unknown
26
- end
27
- it 'returns :unknown for a nil browser_sym' do
28
- DoubleAgent.browser_sym(nil).should == :unknown
29
- end
30
-
31
- #browser_family
32
- it 'returns Firefox for browser family' do
33
- DoubleAgent.browser_family(@ua_string).should == 'Firefox'
34
- end
35
-
36
- #browser_family_sym
37
- it 'returns :firefox for browser_family_sym' do
38
- DoubleAgent.browser_family_sym(@ua_string).should == :firefox
39
- end
40
- it 'returns :unknown for an empty browser_family_sym' do
41
- DoubleAgent.browser_family_sym('').should == :unknown
42
- end
43
- it 'returns :unknown for a nil browser_family_sym' do
44
- DoubleAgent.browser_family_sym(nil).should == :unknown
45
- end
46
-
47
- #browser_icon
48
- it 'returns :firefox for browser_sym' do
49
- DoubleAgent.browser_icon(@ua_string).should == :firefox
50
- end
51
- it 'returns :unkown for an empty browser_sym' do
52
- DoubleAgent.browser_icon('').should == :unknown
53
- end
54
- it 'returns :unkown for a nil browser_sym' do
55
- DoubleAgent.browser_icon(nil).should == :unknown
56
- end
57
-
58
- #browser_family_icon
59
- it 'returns :firefox for browser_family_sym' do
60
- DoubleAgent.browser_family_icon(@ua_string).should == :firefox
61
- end
62
- it 'returns :unkown for an empty browser_family_sym' do
63
- DoubleAgent.browser_family_icon('').should == :unknown
64
- end
65
- it 'returns :unkown for a nil browser_family_sym' do
66
- DoubleAgent.browser_family_icon(nil).should == :unknown
67
- end
68
-
69
- #os
70
- it 'returns Ubuntua for OS' do
71
- DoubleAgent.os(@ua_string).should == 'Ubuntu'
72
- end
73
- it 'returns Unknowna for OS' do
74
- DoubleAgent.os('froofroo').should == 'Unknown'
75
- end
76
- it 'returns Unknowna for OS' do
77
- DoubleAgent.os('').should == 'Unknown'
78
- end
79
-
80
- #os_sym
81
- it 'returns :ubuntu for os_sym' do
82
- DoubleAgent.os_sym(@ua_string).should == :ubuntu
83
- end
84
- it 'returns :unknown for os_sym' do
85
- DoubleAgent.os_sym('froofroo').should == :unknown
86
- end
87
- it 'returns :unknown for an empty os_sym' do
88
- DoubleAgent.os_sym('').should == :unknown
89
- end
90
- it 'returns :unknown for a nil os_sym' do
91
- DoubleAgent.os_sym(nil).should == :unknown
92
- end
93
-
94
- #os_family
95
- it 'returns GNU/Linux OS family' do
96
- DoubleAgent.os_family(@ua_string).should == 'GNU/Linux'
97
- end
98
-
99
- #os_family_sym
100
- it 'returns :linux for os_family_sym' do
101
- DoubleAgent.os_family_sym(@ua_string).should == :linux
102
- end
103
-
104
- #os_icon
105
- it 'returns :ubuntu for os_sym' do
106
- DoubleAgent.os_icon(@ua_string).should == :ubuntu
107
- end
108
-
109
- #os_family_icon
110
- it 'returns :linux for os_family_sym' do
111
- DoubleAgent.os_family_icon(@ua_string).should == :linux
112
- end
113
- end
114
- end