double_agent 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +12 -0
- data/README.rdoc +26 -24
- data/lib/double_agent/all.rb +3 -1
- data/lib/double_agent/logs.rb +94 -33
- data/lib/double_agent/{core.rb → parser.rb} +4 -4
- data/lib/double_agent/stats.rb +58 -42
- data/lib/double_agent.rb +1 -2
- data/spec/data_spec.rb +100 -0
- data/spec/parser_spec.rb +109 -95
- data/spec/spec_helper.rb +11 -1
- data/spec/stats_spec.rb +40 -13
- metadata +6 -6
- data/spec/core_spec.rb +0 -114
data/CHANGELOG
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
== Release 0.2.0 (July 30, 2011)
|
2
|
+
|
3
|
+
* Seperate code into real modules
|
4
|
+
|
5
|
+
* Only load parser and resources by default
|
6
|
+
|
7
|
+
* Add subtotals to percentages_for
|
8
|
+
|
9
|
+
* Add more data parsing methods to DoubleAgent::Logs::Entry objects
|
10
|
+
|
11
|
+
* Various optimizations and stuff I forgot about
|
12
|
+
|
1
13
|
== Release 0.1.2 (June 30, 2011)
|
2
14
|
|
3
15
|
* Bugfix to #browser_sum and #os_sym returning nil on an empty or nil user agent. Should return :unkown.
|
data/README.rdoc
CHANGED
@@ -9,20 +9,17 @@ agent strings. It is designed for parsing large sets for review or analysis.
|
|
9
9
|
|
10
10
|
== Loading
|
11
11
|
|
12
|
-
Double Agent is broken up into
|
12
|
+
Double Agent is broken up into modules - Parser, Resources, Stats and Logs.
|
13
13
|
|
14
|
-
# Load the
|
14
|
+
# Load the core modules (Parser, Resources)
|
15
15
|
require 'double_agent'
|
16
16
|
|
17
|
-
# Load
|
18
|
-
require 'double_agent/
|
17
|
+
# Load modules individually
|
18
|
+
require 'double_agent/parser|resources|stats|logs|all'
|
19
19
|
|
20
|
-
|
21
|
-
require 'double_agent/all'
|
20
|
+
= Parser
|
22
21
|
|
23
|
-
=
|
24
|
-
|
25
|
-
ua_string = "pretent I'm a user agent string for Chrome on Ubuntu"
|
22
|
+
ua_string = "I'm a user agent string for Chrome 11 on Ubuntu"
|
26
23
|
|
27
24
|
DoubleAgent.browser(ua_string)
|
28
25
|
=> "Chrome 11"
|
@@ -41,7 +38,7 @@ See the DoubleAgent module for more.
|
|
41
38
|
= Resources
|
42
39
|
|
43
40
|
DoubleAgent::Resource is a mix-in for objects with a user_agent method or attribute.
|
44
|
-
It gives that class's objects all of the above methods.
|
41
|
+
It gives that class's objects all of the above methods and more.
|
45
42
|
|
46
43
|
class Login
|
47
44
|
include DoubleAgent::Resource
|
@@ -61,29 +58,34 @@ It gives that class's objects all of the above methods.
|
|
61
58
|
|
62
59
|
= Stats
|
63
60
|
|
64
|
-
Calculate browser and OS shares for large sets of DoubleAgent::Resource objects with DoubleAgent::percentages_for.
|
61
|
+
Calculate browser and OS shares for large sets of DoubleAgent::Resource objects with DoubleAgent::Stats::percentages_for.
|
65
62
|
|
66
63
|
logins = Login.all
|
67
|
-
|
64
|
+
p logins.size
|
65
|
+
=> 1000
|
68
66
|
|
67
|
+
stats = DoubleAgent::Stats.percentages_for(logins, :browser_family, :os_family)
|
69
68
|
p stats
|
70
|
-
=> [["Firefox", "Windows", 50.0], ["Internet Explorer", "Windows", 20.0], ["Safari", "OS X", 20.0], ["Firefox", "GNU/Linux", 10.0]]
|
69
|
+
=> [["Firefox", "Windows", 50.0, 500], ["Internet Explorer", "Windows", 20.0, 200], ["Safari", "OS X", 20.0, 200], ["Firefox", "GNU/Linux", 10.0, 100]]
|
71
70
|
|
72
|
-
stats.each do |browser_family, os_family, percent|
|
73
|
-
puts "#{browser_family} on #{os_family} - #{percent}%"
|
71
|
+
stats.each do |browser_family, os_family, percent, real_num|
|
72
|
+
puts "#{browser_family} on #{os_family} - #{percent}% (#{real_num} hits)"
|
74
73
|
end
|
74
|
+
=> "Firefox on Windows - 50% (500 hits)"
|
75
|
+
=> "Internet Explorer on Windows - 20% (200 hits)"
|
76
|
+
=> "Safari on OS X - 20% (200 hits)"
|
77
|
+
=> "Firefox on GNU/Linux - 10% (100 hits)"
|
75
78
|
|
76
79
|
= Logs
|
77
80
|
|
78
|
-
DoubleAgent::
|
79
|
-
object. It even reads gzipped logs (requires zlib)! Since the DoubleAgent::
|
80
|
-
can easily calculate browser and/or OS
|
81
|
-
|
82
|
-
require 'double_agent'
|
83
|
-
require 'double_agent/logs'
|
81
|
+
DoubleAgent::Logs::entries parses through Apache and Nginx access logs, instantiating each log line into a DoubleAgent::Logs::Entry
|
82
|
+
object. It even reads gzipped logs (requires zlib)! Since the DoubleAgent::Logs::Entry class mixes in DoubleAgent::Resource, you
|
83
|
+
can easily calculate browser and/or OS share on your site.
|
84
84
|
|
85
|
-
entries = DoubleAgent.
|
86
|
-
stats = DoubleAgent.percentages_for(entries, :browser)
|
85
|
+
entries = DoubleAgent::Logs.entries("/var/log/nginx/my-site.access.log*")
|
86
|
+
stats = DoubleAgent::Stats.percentages_for(entries, :browser)
|
87
87
|
|
88
88
|
p stats
|
89
|
-
=> [["Firefox 4", 20.0], ["Internet Explorer 8", 18.0], ...]
|
89
|
+
=> [["Firefox 4", 20.0, 650], ["Internet Explorer 8", 18.0, 587], ...]
|
90
|
+
|
91
|
+
Check out the DoubleAgent::Logs::Entry class for more methods besides user_agent.
|
data/lib/double_agent/all.rb
CHANGED
data/lib/double_agent/logs.rb
CHANGED
@@ -1,41 +1,102 @@
|
|
1
|
-
require '
|
1
|
+
require 'date'
|
2
2
|
|
3
3
|
module DoubleAgent
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
4
|
+
#
|
5
|
+
# The Logs module contains methods and classes for parsing user agent strings
|
6
|
+
# from Apache-style logs (includes default Nginx log format). Gzipped logs
|
7
|
+
# are also supported.
|
8
|
+
#
|
9
|
+
module Logs
|
10
|
+
begin
|
11
|
+
require 'zlib'
|
12
|
+
ZLIB = true
|
13
|
+
rescue LoadError
|
14
|
+
$stderr.puts "Zlib not available for DoubleAgent::Logs; gzipped log files will be skipped."
|
15
|
+
ZLIB = false
|
16
|
+
end
|
17
|
+
|
18
|
+
# This class represents a line in an Apache or Nginx access log.
|
19
|
+
# The user agent string is parsed out and available through the
|
20
|
+
# user_agent attribute, making it available to the mixed-in DoubleAgent::Resource methods.
|
21
|
+
# Datestamps and Timestamps may also be retrieved for each instance, using the #on and #at methods, respectively.
|
22
|
+
|
23
|
+
class Entry
|
24
|
+
include DoubleAgent::Resource
|
25
|
+
# Returns the user agent string
|
26
|
+
attr_reader :user_agent, :line
|
27
|
+
|
28
|
+
# Regular expression for pulling a user agent string out of a log entry. It is rather imprecise
|
29
|
+
# only for efficiency's sake.
|
30
|
+
USER_AGENT_REGEXP = /" ".+$/
|
31
|
+
|
32
|
+
# Regexp for parsing an IP address
|
33
|
+
IP_REGEXP = /^[0-9a-z\.:]+/
|
34
|
+
|
35
|
+
# Regex for parsing the date out of the log line
|
36
|
+
DATESTAMP_REGEXP = %r{[0-9]+/[a-z]+/[0-9]+:}i
|
37
|
+
# Regex for parsing DATESTAMP_REGEXP into a Date object
|
38
|
+
DATESTAMP_FORMAT = '%d/%B/%Y:'
|
39
|
+
|
40
|
+
# Regex for parsing the datetime out of the log line
|
41
|
+
TIMESTAMP_REGEXP = %r{[0-9]+/[a-z]+/[0-9]+:[0-9]+:[0-9]+:[0-9]+ (-|\+)[0-9]+}i
|
42
|
+
# Regex for parsing TIMESTAMP_REGEXP into a DateTime object
|
43
|
+
TIMESTAMP_FORMAT = '%d/%B/%Y:%H:%M:%S %z'
|
44
|
+
|
45
|
+
# Initializes a new Entry object. An Apache or Nginx log line should be
|
46
|
+
# passed to it.
|
47
|
+
def initialize(line)
|
48
|
+
@line = line
|
49
|
+
@user_agent = line.slice(USER_AGENT_REGEXP)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns the IP address the hit originated from
|
53
|
+
def ip
|
54
|
+
@line.slice(IP_REGEXP)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Returns the Date the hit occurred on
|
58
|
+
def on
|
59
|
+
date_str = @line.slice(DATESTAMP_REGEXP)
|
60
|
+
date_str ? Date.strptime(date_str, DATESTAMP_FORMAT) : nil
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns the DateTime the hit occurred at
|
64
|
+
def at
|
65
|
+
datetime_str = @line.slice(TIMESTAMP_REGEXP)
|
66
|
+
datetime_str ? DateTime.strptime(datetime_str, TIMESTAMP_FORMAT) : nil
|
19
67
|
end
|
20
68
|
end
|
21
|
-
entries
|
22
|
-
end
|
23
69
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
70
|
+
# Accepts a glob path like /var/logs/apache/my-site.access.log*,
|
71
|
+
# parses all matching files into an array of Entry objects, and returns them.
|
72
|
+
# Gzipped log files are parsed by Zlib.
|
73
|
+
#
|
74
|
+
# Options:
|
75
|
+
#
|
76
|
+
# :match A regular expression. Only lines which match this will be returned.
|
77
|
+
# :ignore A regular expression. Any lines which match this will be ignored.
|
78
|
+
def self.entries(glob_str, options={})
|
79
|
+
match, ignore = options[:match], options[:ignore]
|
80
|
+
entries = []
|
81
|
+
|
82
|
+
# Define the parse lambda
|
83
|
+
parse = (match or ignore) \
|
84
|
+
? lambda { |line| entries << Entry.new(line) unless (match and line !~ match) or (ignore and line =~ ignore) } \
|
85
|
+
: lambda { |line| entries << Entry.new(line) }
|
86
|
+
|
87
|
+
# Define the read lambda
|
88
|
+
read = lambda do |f|
|
89
|
+
zipped = f =~ /\.gz\Z/i
|
90
|
+
return unless ZLIB or not zipped
|
91
|
+
File.open(f, 'r') do |file|
|
92
|
+
handle = zipped ? Zlib::GzipReader.new(file) : file
|
93
|
+
#handle.each_line &parse # A little slower, but may be more memory efficient
|
94
|
+
handle.readlines.each &parse
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
Dir.glob(glob_str).each &read
|
99
|
+
entries
|
39
100
|
end
|
40
101
|
end
|
41
102
|
end
|
@@ -34,7 +34,7 @@ module DoubleAgent
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
# Returns the browser's name. If you provide
|
37
|
+
# Returns the browser's name. If you provide a user agent string as an argument,
|
38
38
|
# it will attempt to also return the major version number. E.g. "Firefox 4".
|
39
39
|
def browser(ua=nil)
|
40
40
|
if ua and (@version or @safe_version)
|
@@ -48,7 +48,7 @@ module DoubleAgent
|
|
48
48
|
# BrowserParser would return the Chromium BrowserParser. For browsers that are their
|
49
49
|
# own family (e.g. Firefox, IE) it will end up returning itself.
|
50
50
|
def family
|
51
|
-
BROWSERS[family_sym]
|
51
|
+
BROWSERS[@family_sym]
|
52
52
|
end
|
53
53
|
|
54
54
|
private
|
@@ -56,7 +56,7 @@ module DoubleAgent
|
|
56
56
|
# Attempts to parse and return the browser's version from a user agent string. Returns
|
57
57
|
# nil if nothing is found.
|
58
58
|
def version(ua)
|
59
|
-
if @safe_version
|
59
|
+
if @safe_version
|
60
60
|
ua.slice(@safe_version[0]).slice(@safe_version[1])
|
61
61
|
else
|
62
62
|
ua.slice(@version)
|
@@ -83,7 +83,7 @@ module DoubleAgent
|
|
83
83
|
# OSParser would return the GNU/Linux OSerParser. For OSes that are their own
|
84
84
|
# family (e.g. OS X) it will end up returning itself.
|
85
85
|
def family
|
86
|
-
OSES[family_sym]
|
86
|
+
OSES[@family_sym]
|
87
87
|
end
|
88
88
|
end
|
89
89
|
|
data/lib/double_agent/stats.rb
CHANGED
@@ -1,49 +1,65 @@
|
|
1
1
|
module DoubleAgent
|
2
|
-
# True if running under less than Ruby 1.9
|
3
|
-
BAD_RUBY = RUBY_VERSION < '1.9.0'
|
4
|
-
|
5
|
-
if BAD_RUBY
|
6
|
-
require 'bigdecimal'
|
7
|
-
# If BAD_RUBY, this is used in lieu of the native round method
|
8
|
-
def self.better_round(f, n)
|
9
|
-
d = BigDecimal.new f.to_s
|
10
|
-
d.round(n).to_f
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
# For the given "things", returns the share of the group that each attr has.
|
15
|
-
#
|
16
|
-
# "things" is an array of objects who's classes "include DoubleAgent::Resource".
|
17
|
-
#
|
18
|
-
# "args" is one or more method symbols from DoubleAgent::Resource.
|
19
2
|
#
|
20
|
-
#
|
21
|
-
#
|
3
|
+
# The Stats module provides methods for determining browser and/or OS share
|
4
|
+
# for large numbers of DoubleAgent::Resource objects.
|
22
5
|
#
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
# Example, Browser/OS share, asking for symbols back:
|
28
|
-
# DoubleAgent.percentages_for(server_log_entries, :browser_sym, :os_sym)
|
29
|
-
# [[:firefox, :windows_7, 50.4], [:chrome, :osx, 19.6], [:msie, :windows_xp, 15], [:safari, :osx, 10], [:other, :other, 5]]
|
30
|
-
def self.percentages_for(things, *args)
|
31
|
-
options = args.last.is_a?(Hash) ? args.pop : {} # Break out options
|
32
|
-
p = {}
|
33
|
-
things.each do |h|
|
34
|
-
syms = args.map { |attr| h.send attr }
|
35
|
-
p[syms] ||= 0
|
36
|
-
p[syms] += 1
|
37
|
-
end
|
38
|
-
size = things.size.to_f
|
39
|
-
p = p.to_a
|
6
|
+
module Stats
|
7
|
+
# True if running under less than Ruby 1.9
|
8
|
+
BAD_RUBY = RUBY_VERSION < '1.9.0'
|
9
|
+
|
40
10
|
if BAD_RUBY
|
41
|
-
|
42
|
-
|
43
|
-
|
11
|
+
require 'bigdecimal'
|
12
|
+
# If BAD_RUBY, this is used in lieu of the native round method
|
13
|
+
def self.better_round(f, n)
|
14
|
+
d = BigDecimal.new f.to_s
|
15
|
+
d.round(n).to_f
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# For the given "things", returns the share of the group that each attr has.
|
20
|
+
#
|
21
|
+
# "things" is an array of objects who's classes mix-in DoubleAgent::Resource.
|
22
|
+
#
|
23
|
+
# "args" is one or more method symbols from DoubleAgent::Resource.
|
24
|
+
#
|
25
|
+
# "args" may have, as it's last member, :threshold => n, where n is the number of the lowest
|
26
|
+
# percentage you want returned.
|
27
|
+
#
|
28
|
+
# Returns an array of [attribute(s), percent of total, number of total]
|
29
|
+
#
|
30
|
+
# Example, Browser Family share:
|
31
|
+
#
|
32
|
+
# DoubleAgent::Stats.percentages_for(logins, :browser_family)
|
33
|
+
# [['Firefox', 50.4, 5040], ['Chrome', 19.6, 1960], ['Internet Explorer', 15, 1500], ['Safari', 10, 1000], ['Unknown', 5, 500]]
|
34
|
+
#
|
35
|
+
# Example, Browser/OS share, asking for symbols back:
|
36
|
+
#
|
37
|
+
# DoubleAgent::Stats.percentages_for(server_log_entries, :browser_sym, :os_sym)
|
38
|
+
# [[:firefox, :windows_7, 50.4, 5040], [:chrome, :osx, 19.6, 1960], [:msie, :windows_xp, 15, 1500], [:safari, :osx, 10, 1000], [:other, :other, 5, 100]]
|
39
|
+
def self.percentages_for(things, *args)
|
40
|
+
options = args.last.is_a?(Hash) ? args.pop : {} # Break out options
|
41
|
+
results = {}
|
42
|
+
# Count each instance
|
43
|
+
things.each do |h|
|
44
|
+
syms = args.map { |attr| h.send attr }
|
45
|
+
results[syms] ||= 0
|
46
|
+
results[syms] += 1
|
47
|
+
end
|
48
|
+
size = things.size.to_f
|
49
|
+
results = results.to_a
|
50
|
+
# From the total, calculate the percentage held by each browser, OS, etc.
|
51
|
+
if BAD_RUBY
|
52
|
+
results.collect! { |k,n| [*k.<<(better_round(((n * 100) / size), 2)).<<(n)] }
|
53
|
+
else
|
54
|
+
# Ruby 1.9 syntax that blows up in Ruby 1.8
|
55
|
+
#results.collect! { |k,n| [*k, ((n * 100) / size).round(2), n] }
|
56
|
+
results.collect! { |k,n| [*k.<<(((n * 100) / size).round(2)).<<(n)] }
|
57
|
+
end
|
58
|
+
# Sort in ascending order
|
59
|
+
results.sort! { |a,b| b.last <=> a.last }
|
60
|
+
# Reject percentages below a specified threshold
|
61
|
+
results.reject! { |a| a[-2] < options[:threshold] } if options[:threshold]
|
62
|
+
results
|
44
63
|
end
|
45
|
-
p.sort! { |a,b| b.last <=> a.last }
|
46
|
-
p.reject! { |a| a.last < options[:threshold] } if options[:threshold]
|
47
|
-
p
|
48
64
|
end
|
49
65
|
end
|
data/lib/double_agent.rb
CHANGED
data/spec/data_spec.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
DA = DoubleAgent
|
4
|
+
|
5
|
+
describe DoubleAgent do
|
6
|
+
# Internet Explorer
|
7
|
+
it 'should be Internet Explorer 10 on Windows 8' do
|
8
|
+
ua = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/5.0"
|
9
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 10 on Windows 8'
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should be Internet Explorer 9 on Windows 7' do
|
13
|
+
ua = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0"
|
14
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 9 on Windows 7'
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should be Internet Explorer 8 on Windows Vista' do
|
18
|
+
ua = "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Win64; x64; Trident/5.0"
|
19
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 8 on Windows Vista'
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should be Internet Explorer 7 on Windows XP' do
|
23
|
+
ua = "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.2; Win64; x64; Trident/5.0"
|
24
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 7 on Windows XP'
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should be Internet Explorer 7 on Windows XP' do
|
28
|
+
ua = "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.1; Win64; x64; Trident/5.0"
|
29
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 7 on Windows XP'
|
30
|
+
end
|
31
|
+
|
32
|
+
# Chrome
|
33
|
+
it 'should be Chrome 12 on Windows XP' do
|
34
|
+
ua = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.25 (KHTML, like Gecko) Chrome/12.0.706.0 Safari/534.25"
|
35
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Chrome 12 on Windows XP'
|
36
|
+
end
|
37
|
+
|
38
|
+
# Chromium
|
39
|
+
it 'should be Chrome 12 on Ubuntu' do
|
40
|
+
ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.703.0 Chrome/12.0.703.0 Safari/534.24"
|
41
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Chrome 12 on Ubuntu'
|
42
|
+
end
|
43
|
+
|
44
|
+
# Android
|
45
|
+
it 'should be Android 2.3 on Android' do
|
46
|
+
ua = "Mozilla/5.0 (Linux; U; Android 2.3.3; zh-tw; HTC_Pyramid Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"
|
47
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Android 2.3 on Android'
|
48
|
+
end
|
49
|
+
|
50
|
+
# Safari
|
51
|
+
it 'should be Safari 5 on OS X' do
|
52
|
+
ua = "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27"
|
53
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Safari 5 on OS X'
|
54
|
+
end
|
55
|
+
|
56
|
+
# Opera
|
57
|
+
it 'should be Opera 11 on GNU/Linux' do
|
58
|
+
ua = "Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00"
|
59
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Opera 11 on GNU/Linux'
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should be Opera 11 on Windows 7' do
|
63
|
+
ua = "Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01"
|
64
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Opera 11 on Windows 7'
|
65
|
+
end
|
66
|
+
|
67
|
+
# Firefox
|
68
|
+
it 'should be Firefox 4 on GNU/Linux' do
|
69
|
+
ua = "Mozilla/5.0 (X11; U; Linux x86_64; pl-PL; rv:2.0) Gecko/20110307 Firefox/4.0"
|
70
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Firefox 4 on GNU/Linux'
|
71
|
+
end
|
72
|
+
|
73
|
+
# Epiphany
|
74
|
+
it 'should be Epiphany on GNU/Linux' do
|
75
|
+
ua = "Mozilla/5.0 (X11; U; Linux x86_64; fr-FR) AppleWebKit/534.7 (KHTML, like Gecko) Epiphany/2.30.6 Safari/534.7"
|
76
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Epiphany on GNU/Linux'
|
77
|
+
end
|
78
|
+
|
79
|
+
# Konqueror
|
80
|
+
it 'should be Konqueror on FreeBSD' do
|
81
|
+
ua = "Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)"
|
82
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on FreeBSD'
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should be Konqueror on Fedora' do
|
86
|
+
ua = "Mozilla/5.0 (compatible; Konqueror/4.4; Linux) KHTML/4.4.1 (like Gecko) Fedora/4.4.1-1.fc12"
|
87
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on Fedora'
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'should be Konqueror on Slackware' do
|
91
|
+
ua = "Mozilla/5.0 (compatible; Konqueror/4.2; Linux) KHTML/4.2.4 (like Gecko) Slackware/13.0"
|
92
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on Slackware'
|
93
|
+
end
|
94
|
+
|
95
|
+
# BlackBerry
|
96
|
+
it 'should be BlackBerry on BlackBerry' do
|
97
|
+
ua = "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; zh-TW) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.448 Mobile Safari/534.8+"
|
98
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'BlackBerry on BlackBerry'
|
99
|
+
end
|
100
|
+
end
|
data/spec/parser_spec.rb
CHANGED
@@ -1,100 +1,114 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/spec_helper'
|
2
2
|
|
3
|
-
DA = DoubleAgent
|
4
|
-
|
5
3
|
describe DoubleAgent do
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
4
|
+
context 'Parser' do
|
5
|
+
before do
|
6
|
+
@ua_string = 'Mozilla/5.0 (X11; Ubuntu Linux i686; rv:2.0) Gecko/20100101 Firefox/4.0'
|
7
|
+
end
|
8
|
+
|
9
|
+
#browser
|
10
|
+
it 'returns Firefox 4 for browser' do
|
11
|
+
DoubleAgent.browser(@ua_string).should == 'Firefox 4'
|
12
|
+
end
|
13
|
+
it 'returns Unknown for browser' do
|
14
|
+
DoubleAgent.browser('froofroo').should == 'Unknown'
|
15
|
+
end
|
16
|
+
|
17
|
+
#browser_sym
|
18
|
+
it 'returns :firefox for browser_sym' do
|
19
|
+
DoubleAgent.browser_sym(@ua_string).should == :firefox
|
20
|
+
end
|
21
|
+
it 'returns :unknown for browser_sym' do
|
22
|
+
DoubleAgent.browser_sym('froofroo').should == :unknown
|
23
|
+
end
|
24
|
+
it 'returns :unknown for an empty browser_sym' do
|
25
|
+
DoubleAgent.browser_sym('').should == :unknown
|
26
|
+
end
|
27
|
+
it 'returns :unknown for a nil browser_sym' do
|
28
|
+
DoubleAgent.browser_sym(nil).should == :unknown
|
29
|
+
end
|
30
|
+
|
31
|
+
#browser_family
|
32
|
+
it 'returns Firefox for browser family' do
|
33
|
+
DoubleAgent.browser_family(@ua_string).should == 'Firefox'
|
34
|
+
end
|
35
|
+
|
36
|
+
#browser_family_sym
|
37
|
+
it 'returns :firefox for browser_family_sym' do
|
38
|
+
DoubleAgent.browser_family_sym(@ua_string).should == :firefox
|
39
|
+
end
|
40
|
+
it 'returns :unknown for an empty browser_family_sym' do
|
41
|
+
DoubleAgent.browser_family_sym('').should == :unknown
|
42
|
+
end
|
43
|
+
it 'returns :unknown for a nil browser_family_sym' do
|
44
|
+
DoubleAgent.browser_family_sym(nil).should == :unknown
|
45
|
+
end
|
46
|
+
|
47
|
+
#browser_icon
|
48
|
+
it 'returns :firefox for browser_sym' do
|
49
|
+
DoubleAgent.browser_icon(@ua_string).should == :firefox
|
50
|
+
end
|
51
|
+
it 'returns :unkown for an empty browser_sym' do
|
52
|
+
DoubleAgent.browser_icon('').should == :unknown
|
53
|
+
end
|
54
|
+
it 'returns :unkown for a nil browser_sym' do
|
55
|
+
DoubleAgent.browser_icon(nil).should == :unknown
|
56
|
+
end
|
57
|
+
|
58
|
+
#browser_family_icon
|
59
|
+
it 'returns :firefox for browser_family_sym' do
|
60
|
+
DoubleAgent.browser_family_icon(@ua_string).should == :firefox
|
61
|
+
end
|
62
|
+
it 'returns :unkown for an empty browser_family_sym' do
|
63
|
+
DoubleAgent.browser_family_icon('').should == :unknown
|
64
|
+
end
|
65
|
+
it 'returns :unkown for a nil browser_family_sym' do
|
66
|
+
DoubleAgent.browser_family_icon(nil).should == :unknown
|
67
|
+
end
|
68
|
+
|
69
|
+
#os
|
70
|
+
it 'returns Ubuntua for OS' do
|
71
|
+
DoubleAgent.os(@ua_string).should == 'Ubuntu'
|
72
|
+
end
|
73
|
+
it 'returns Unknowna for OS' do
|
74
|
+
DoubleAgent.os('froofroo').should == 'Unknown'
|
75
|
+
end
|
76
|
+
it 'returns Unknowna for OS' do
|
77
|
+
DoubleAgent.os('').should == 'Unknown'
|
78
|
+
end
|
79
|
+
|
80
|
+
#os_sym
|
81
|
+
it 'returns :ubuntu for os_sym' do
|
82
|
+
DoubleAgent.os_sym(@ua_string).should == :ubuntu
|
83
|
+
end
|
84
|
+
it 'returns :unknown for os_sym' do
|
85
|
+
DoubleAgent.os_sym('froofroo').should == :unknown
|
86
|
+
end
|
87
|
+
it 'returns :unknown for an empty os_sym' do
|
88
|
+
DoubleAgent.os_sym('').should == :unknown
|
89
|
+
end
|
90
|
+
it 'returns :unknown for a nil os_sym' do
|
91
|
+
DoubleAgent.os_sym(nil).should == :unknown
|
92
|
+
end
|
93
|
+
|
94
|
+
#os_family
|
95
|
+
it 'returns GNU/Linux OS family' do
|
96
|
+
DoubleAgent.os_family(@ua_string).should == 'GNU/Linux'
|
97
|
+
end
|
98
|
+
|
99
|
+
#os_family_sym
|
100
|
+
it 'returns :linux for os_family_sym' do
|
101
|
+
DoubleAgent.os_family_sym(@ua_string).should == :linux
|
102
|
+
end
|
103
|
+
|
104
|
+
#os_icon
|
105
|
+
it 'returns :ubuntu for os_sym' do
|
106
|
+
DoubleAgent.os_icon(@ua_string).should == :ubuntu
|
107
|
+
end
|
108
|
+
|
109
|
+
#os_family_icon
|
110
|
+
it 'returns :linux for os_family_sym' do
|
111
|
+
DoubleAgent.os_family_icon(@ua_string).should == :linux
|
112
|
+
end
|
99
113
|
end
|
100
114
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'rspec'
|
2
|
-
require File.dirname(__FILE__) + '/../lib/double_agent/
|
2
|
+
require File.dirname(__FILE__) + '/../lib/double_agent/parser'
|
3
3
|
require File.dirname(__FILE__) + '/../lib/double_agent/resources'
|
4
4
|
require File.dirname(__FILE__) + '/../lib/double_agent/stats'
|
5
5
|
require File.dirname(__FILE__) + '/../lib/double_agent/logs'
|
@@ -7,3 +7,13 @@ require File.dirname(__FILE__) + '/../lib/double_agent/logs'
|
|
7
7
|
Rspec.configure do |c|
|
8
8
|
c.mock_with :rspec
|
9
9
|
end
|
10
|
+
|
11
|
+
module Kernel
|
12
|
+
def suppress_warnings
|
13
|
+
original_verbosity = $VERBOSE
|
14
|
+
$VERBOSE = nil
|
15
|
+
result = yield
|
16
|
+
$VERBOSE = original_verbosity
|
17
|
+
return result
|
18
|
+
end
|
19
|
+
end
|
data/spec/stats_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/spec_helper'
|
2
2
|
|
3
3
|
log_glob = File.dirname(__FILE__) + '/data/*.access.log*'
|
4
|
-
entries = DoubleAgent::
|
4
|
+
entries = DoubleAgent::Logs::entries(log_glob, :match => /^\d/)
|
5
5
|
|
6
6
|
describe DoubleAgent do
|
7
7
|
context 'Logs' do
|
@@ -10,27 +10,54 @@ describe DoubleAgent do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
it 'should have loaded n log entries' do
|
13
|
-
DoubleAgent::
|
13
|
+
DoubleAgent::Logs::entries(log_glob, :match => /^\d/, :ignore => %r{ /dashboard }).size.should == 44
|
14
|
+
end
|
15
|
+
|
16
|
+
context 'without zlib' do
|
17
|
+
it 'should have loaded n log entries' do
|
18
|
+
suppress_warnings { DoubleAgent::Logs::ZLIB = false }
|
19
|
+
plain_entries = DoubleAgent::Logs::entries(log_glob, :match => /^\d/)
|
20
|
+
suppress_warnings { DoubleAgent::Logs::ZLIB = true }
|
21
|
+
plain_entries.size.should == 17
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
context 'parsing other data' do
|
26
|
+
before :each do
|
27
|
+
@line = DoubleAgent::Logs::Entry.new '68.52.99.211 - - [04/May/2011:08:21:04 -0400] "GET / HTTP/1.1" 200 1312 "-" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.17) Gecko/20110420 Firefox/3.6.17"'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should parse the IP' do
|
31
|
+
@line.ip.should == '68.52.99.211'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should parse datestamps' do
|
35
|
+
@line.on.should == Date.new(2011, 5, 4)
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'should parse timestamps' do
|
39
|
+
@line.at.should == DateTime.new(2011, 5, 4, 8, 21, 4, '-0400')
|
40
|
+
end
|
14
41
|
end
|
15
42
|
end
|
16
43
|
|
17
44
|
context 'Stats' do
|
18
45
|
it 'should calculate stats' do
|
19
|
-
stats = DoubleAgent.percentages_for entries, :browser_family, :os_family
|
20
|
-
answer = [["Internet Explorer", "Windows", 42.55],
|
21
|
-
["Chromium", "GNU/Linux", 40.43],
|
22
|
-
["Firefox", "GNU/Linux", 10.64],
|
23
|
-
["Firefox", "OS X", 4.26],
|
24
|
-
["Safari", "OS X", 2.13]]
|
46
|
+
stats = DoubleAgent::Stats.percentages_for entries, :browser_family, :os_family
|
47
|
+
answer = [["Internet Explorer", "Windows", 42.55, 20],
|
48
|
+
["Chromium", "GNU/Linux", 40.43, 19],
|
49
|
+
["Firefox", "GNU/Linux", 10.64, 5],
|
50
|
+
["Firefox", "OS X", 4.26, 2],
|
51
|
+
["Safari", "OS X", 2.13, 1]]
|
25
52
|
stats.should == answer
|
26
53
|
end
|
27
54
|
|
28
55
|
it 'should ignore stats below the threshold' do
|
29
|
-
stats = DoubleAgent.percentages_for entries, :browser_family, :os_family, :threshold => 3.0
|
30
|
-
answer = [["Internet Explorer", "Windows", 42.55],
|
31
|
-
["Chromium", "GNU/Linux", 40.43],
|
32
|
-
["Firefox", "GNU/Linux", 10.64],
|
33
|
-
["Firefox", "OS X", 4.26]]
|
56
|
+
stats = DoubleAgent::Stats.percentages_for entries, :browser_family, :os_family, :threshold => 3.0
|
57
|
+
answer = [["Internet Explorer", "Windows", 42.55, 20],
|
58
|
+
["Chromium", "GNU/Linux", 40.43, 19],
|
59
|
+
["Firefox", "GNU/Linux", 10.64, 5],
|
60
|
+
["Firefox", "OS X", 4.26, 2]]
|
34
61
|
stats.should == answer
|
35
62
|
end
|
36
63
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
- 1
|
8
7
|
- 2
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Jordan Hollinger
|
@@ -14,11 +14,11 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-07-30 00:00:00 -04:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
21
|
-
description: Browser User Agent string parser with
|
21
|
+
description: Browser User Agent string parser with resources, stats, and a log reader
|
22
22
|
email: jordan@jordanhollinger.com
|
23
23
|
executables: []
|
24
24
|
|
@@ -31,17 +31,17 @@ files:
|
|
31
31
|
- lib/double_agent.rb
|
32
32
|
- lib/double_agent/all.rb
|
33
33
|
- lib/double_agent/logs.rb
|
34
|
-
- lib/double_agent/core.rb
|
35
34
|
- lib/double_agent/resources.rb
|
36
35
|
- lib/double_agent/stats.rb
|
36
|
+
- lib/double_agent/parser.rb
|
37
37
|
- data/browsers.yml
|
38
38
|
- data/oses.yml
|
39
39
|
- spec/spec_helper.rb
|
40
|
-
- spec/core_spec.rb
|
41
40
|
- spec/data/httpd.access.log.1.gz
|
42
41
|
- spec/data/httpd.access.log
|
43
42
|
- spec/stats_spec.rb
|
44
43
|
- spec/resources_spec.rb
|
44
|
+
- spec/data_spec.rb
|
45
45
|
- spec/parser_spec.rb
|
46
46
|
- README.rdoc
|
47
47
|
- LICENSE
|
data/spec/core_spec.rb
DELETED
@@ -1,114 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
-
|
3
|
-
describe DoubleAgent do
|
4
|
-
context 'Core' do
|
5
|
-
before do
|
6
|
-
@ua_string = 'Mozilla/5.0 (X11; Ubuntu Linux i686; rv:2.0) Gecko/20100101 Firefox/4.0'
|
7
|
-
end
|
8
|
-
|
9
|
-
#browser
|
10
|
-
it 'returns Firefox 4 for browser' do
|
11
|
-
DoubleAgent.browser(@ua_string).should == 'Firefox 4'
|
12
|
-
end
|
13
|
-
it 'returns Unknown for browser' do
|
14
|
-
DoubleAgent.browser('froofroo').should == 'Unknown'
|
15
|
-
end
|
16
|
-
|
17
|
-
#browser_sym
|
18
|
-
it 'returns :firefox for browser_sym' do
|
19
|
-
DoubleAgent.browser_sym(@ua_string).should == :firefox
|
20
|
-
end
|
21
|
-
it 'returns :unknown for browser_sym' do
|
22
|
-
DoubleAgent.browser_sym('froofroo').should == :unknown
|
23
|
-
end
|
24
|
-
it 'returns :unknown for an empty browser_sym' do
|
25
|
-
DoubleAgent.browser_sym('').should == :unknown
|
26
|
-
end
|
27
|
-
it 'returns :unknown for a nil browser_sym' do
|
28
|
-
DoubleAgent.browser_sym(nil).should == :unknown
|
29
|
-
end
|
30
|
-
|
31
|
-
#browser_family
|
32
|
-
it 'returns Firefox for browser family' do
|
33
|
-
DoubleAgent.browser_family(@ua_string).should == 'Firefox'
|
34
|
-
end
|
35
|
-
|
36
|
-
#browser_family_sym
|
37
|
-
it 'returns :firefox for browser_family_sym' do
|
38
|
-
DoubleAgent.browser_family_sym(@ua_string).should == :firefox
|
39
|
-
end
|
40
|
-
it 'returns :unknown for an empty browser_family_sym' do
|
41
|
-
DoubleAgent.browser_family_sym('').should == :unknown
|
42
|
-
end
|
43
|
-
it 'returns :unknown for a nil browser_family_sym' do
|
44
|
-
DoubleAgent.browser_family_sym(nil).should == :unknown
|
45
|
-
end
|
46
|
-
|
47
|
-
#browser_icon
|
48
|
-
it 'returns :firefox for browser_sym' do
|
49
|
-
DoubleAgent.browser_icon(@ua_string).should == :firefox
|
50
|
-
end
|
51
|
-
it 'returns :unkown for an empty browser_sym' do
|
52
|
-
DoubleAgent.browser_icon('').should == :unknown
|
53
|
-
end
|
54
|
-
it 'returns :unkown for a nil browser_sym' do
|
55
|
-
DoubleAgent.browser_icon(nil).should == :unknown
|
56
|
-
end
|
57
|
-
|
58
|
-
#browser_family_icon
|
59
|
-
it 'returns :firefox for browser_family_sym' do
|
60
|
-
DoubleAgent.browser_family_icon(@ua_string).should == :firefox
|
61
|
-
end
|
62
|
-
it 'returns :unkown for an empty browser_family_sym' do
|
63
|
-
DoubleAgent.browser_family_icon('').should == :unknown
|
64
|
-
end
|
65
|
-
it 'returns :unkown for a nil browser_family_sym' do
|
66
|
-
DoubleAgent.browser_family_icon(nil).should == :unknown
|
67
|
-
end
|
68
|
-
|
69
|
-
#os
|
70
|
-
it 'returns Ubuntua for OS' do
|
71
|
-
DoubleAgent.os(@ua_string).should == 'Ubuntu'
|
72
|
-
end
|
73
|
-
it 'returns Unknowna for OS' do
|
74
|
-
DoubleAgent.os('froofroo').should == 'Unknown'
|
75
|
-
end
|
76
|
-
it 'returns Unknowna for OS' do
|
77
|
-
DoubleAgent.os('').should == 'Unknown'
|
78
|
-
end
|
79
|
-
|
80
|
-
#os_sym
|
81
|
-
it 'returns :ubuntu for os_sym' do
|
82
|
-
DoubleAgent.os_sym(@ua_string).should == :ubuntu
|
83
|
-
end
|
84
|
-
it 'returns :unknown for os_sym' do
|
85
|
-
DoubleAgent.os_sym('froofroo').should == :unknown
|
86
|
-
end
|
87
|
-
it 'returns :unknown for an empty os_sym' do
|
88
|
-
DoubleAgent.os_sym('').should == :unknown
|
89
|
-
end
|
90
|
-
it 'returns :unknown for a nil os_sym' do
|
91
|
-
DoubleAgent.os_sym(nil).should == :unknown
|
92
|
-
end
|
93
|
-
|
94
|
-
#os_family
|
95
|
-
it 'returns GNU/Linux OS family' do
|
96
|
-
DoubleAgent.os_family(@ua_string).should == 'GNU/Linux'
|
97
|
-
end
|
98
|
-
|
99
|
-
#os_family_sym
|
100
|
-
it 'returns :linux for os_family_sym' do
|
101
|
-
DoubleAgent.os_family_sym(@ua_string).should == :linux
|
102
|
-
end
|
103
|
-
|
104
|
-
#os_icon
|
105
|
-
it 'returns :ubuntu for os_sym' do
|
106
|
-
DoubleAgent.os_icon(@ua_string).should == :ubuntu
|
107
|
-
end
|
108
|
-
|
109
|
-
#os_family_icon
|
110
|
-
it 'returns :linux for os_family_sym' do
|
111
|
-
DoubleAgent.os_family_icon(@ua_string).should == :linux
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|