double_agent 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +12 -0
- data/README.rdoc +26 -24
- data/lib/double_agent/all.rb +3 -1
- data/lib/double_agent/logs.rb +94 -33
- data/lib/double_agent/{core.rb → parser.rb} +4 -4
- data/lib/double_agent/stats.rb +58 -42
- data/lib/double_agent.rb +1 -2
- data/spec/data_spec.rb +100 -0
- data/spec/parser_spec.rb +109 -95
- data/spec/spec_helper.rb +11 -1
- data/spec/stats_spec.rb +40 -13
- metadata +6 -6
- data/spec/core_spec.rb +0 -114
data/CHANGELOG
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
== Release 0.2.0 (July 30, 2011)
|
2
|
+
|
3
|
+
* Seperate code into real modules
|
4
|
+
|
5
|
+
* Only load parser and resources by default
|
6
|
+
|
7
|
+
* Add subtotals to percentages_for
|
8
|
+
|
9
|
+
* Add more data parsing methods to DoubleAgent::Logs::Entry objects
|
10
|
+
|
11
|
+
* Various optimizations and stuff I forgot about
|
12
|
+
|
1
13
|
== Release 0.1.2 (June 30, 2011)
|
2
14
|
|
3
15
|
* Bugfix to #browser_sum and #os_sym returning nil on an empty or nil user agent. Should return :unkown.
|
data/README.rdoc
CHANGED
@@ -9,20 +9,17 @@ agent strings. It is designed for parsing large sets for review or analysis.
|
|
9
9
|
|
10
10
|
== Loading
|
11
11
|
|
12
|
-
Double Agent is broken up into
|
12
|
+
Double Agent is broken up into modules - Parser, Resources, Stats and Logs.
|
13
13
|
|
14
|
-
# Load the
|
14
|
+
# Load the core modules (Parser, Resources)
|
15
15
|
require 'double_agent'
|
16
16
|
|
17
|
-
# Load
|
18
|
-
require 'double_agent/
|
17
|
+
# Load modules individually
|
18
|
+
require 'double_agent/parser|resources|stats|logs|all'
|
19
19
|
|
20
|
-
|
21
|
-
require 'double_agent/all'
|
20
|
+
= Parser
|
22
21
|
|
23
|
-
=
|
24
|
-
|
25
|
-
ua_string = "pretent I'm a user agent string for Chrome on Ubuntu"
|
22
|
+
ua_string = "I'm a user agent string for Chrome 11 on Ubuntu"
|
26
23
|
|
27
24
|
DoubleAgent.browser(ua_string)
|
28
25
|
=> "Chrome 11"
|
@@ -41,7 +38,7 @@ See the DoubleAgent module for more.
|
|
41
38
|
= Resources
|
42
39
|
|
43
40
|
DoubleAgent::Resource is a mix-in for objects with a user_agent method or attribute.
|
44
|
-
It gives that class's objects all of the above methods.
|
41
|
+
It gives that class's objects all of the above methods and more.
|
45
42
|
|
46
43
|
class Login
|
47
44
|
include DoubleAgent::Resource
|
@@ -61,29 +58,34 @@ It gives that class's objects all of the above methods.
|
|
61
58
|
|
62
59
|
= Stats
|
63
60
|
|
64
|
-
Calculate browser and OS shares for large sets of DoubleAgent::Resource objects with DoubleAgent::percentages_for.
|
61
|
+
Calculate browser and OS shares for large sets of DoubleAgent::Resource objects with DoubleAgent::Stats::percentages_for.
|
65
62
|
|
66
63
|
logins = Login.all
|
67
|
-
|
64
|
+
p logins.size
|
65
|
+
=> 1000
|
68
66
|
|
67
|
+
stats = DoubleAgent::Stats.percentages_for(logins, :browser_family, :os_family)
|
69
68
|
p stats
|
70
|
-
=> [["Firefox", "Windows", 50.0], ["Internet Explorer", "Windows", 20.0], ["Safari", "OS X", 20.0], ["Firefox", "GNU/Linux", 10.0]]
|
69
|
+
=> [["Firefox", "Windows", 50.0, 500], ["Internet Explorer", "Windows", 20.0, 200], ["Safari", "OS X", 20.0, 200], ["Firefox", "GNU/Linux", 10.0, 100]]
|
71
70
|
|
72
|
-
stats.each do |browser_family, os_family, percent|
|
73
|
-
puts "#{browser_family} on #{os_family} - #{percent}%"
|
71
|
+
stats.each do |browser_family, os_family, percent, real_num|
|
72
|
+
puts "#{browser_family} on #{os_family} - #{percent}% (#{real_num} hits)"
|
74
73
|
end
|
74
|
+
=> "Firefox on Windows - 50% (500 hits)"
|
75
|
+
=> "Internet Explorer on Windows - 20% (200 hits)"
|
76
|
+
=> "Safari on OS X - 20% (200 hits)"
|
77
|
+
=> "Firefox on GNU/Linux - 10% (100 hits)"
|
75
78
|
|
76
79
|
= Logs
|
77
80
|
|
78
|
-
DoubleAgent::
|
79
|
-
object. It even reads gzipped logs (requires zlib)! Since the DoubleAgent::
|
80
|
-
can easily calculate browser and/or OS
|
81
|
-
|
82
|
-
require 'double_agent'
|
83
|
-
require 'double_agent/logs'
|
81
|
+
DoubleAgent::Logs::entries parses through Apache and Nginx access logs, instantiating each log line into a DoubleAgent::Logs::Entry
|
82
|
+
object. It even reads gzipped logs (requires zlib)! Since the DoubleAgent::Logs::Entry class mixes in DoubleAgent::Resource, you
|
83
|
+
can easily calculate browser and/or OS share on your site.
|
84
84
|
|
85
|
-
entries = DoubleAgent.
|
86
|
-
stats = DoubleAgent.percentages_for(entries, :browser)
|
85
|
+
entries = DoubleAgent::Logs.entries("/var/log/nginx/my-site.access.log*")
|
86
|
+
stats = DoubleAgent::Stats.percentages_for(entries, :browser)
|
87
87
|
|
88
88
|
p stats
|
89
|
-
=> [["Firefox 4", 20.0], ["Internet Explorer 8", 18.0], ...]
|
89
|
+
=> [["Firefox 4", 20.0, 650], ["Internet Explorer 8", 18.0, 587], ...]
|
90
|
+
|
91
|
+
Check out the DoubleAgent::Logs::Entry class for more methods besides user_agent.
|
data/lib/double_agent/all.rb
CHANGED
data/lib/double_agent/logs.rb
CHANGED
@@ -1,41 +1,102 @@
|
|
1
|
-
require '
|
1
|
+
require 'date'
|
2
2
|
|
3
3
|
module DoubleAgent
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
4
|
+
#
|
5
|
+
# The Logs module contains methods and classes for parsing user agent strings
|
6
|
+
# from Apache-style logs (includes default Nginx log format). Gzipped logs
|
7
|
+
# are also supported.
|
8
|
+
#
|
9
|
+
module Logs
|
10
|
+
begin
|
11
|
+
require 'zlib'
|
12
|
+
ZLIB = true
|
13
|
+
rescue LoadError
|
14
|
+
$stderr.puts "Zlib not available for DoubleAgent::Logs; gzipped log files will be skipped."
|
15
|
+
ZLIB = false
|
16
|
+
end
|
17
|
+
|
18
|
+
# This class represents a line in an Apache or Nginx access log.
|
19
|
+
# The user agent string is parsed out and available through the
|
20
|
+
# user_agent attribute, making it available to the mixed-in DoubleAgent::Resource methods.
|
21
|
+
# Datestamps and Timestamps may also be retrieved for each instance, using the #on and #at methods, respectively.
|
22
|
+
|
23
|
+
class Entry
|
24
|
+
include DoubleAgent::Resource
|
25
|
+
# Returns the user agent string
|
26
|
+
attr_reader :user_agent, :line
|
27
|
+
|
28
|
+
# Regular expression for pulling a user agent string out of a log entry. It is rather imprecise
|
29
|
+
# only for efficiency's sake.
|
30
|
+
USER_AGENT_REGEXP = /" ".+$/
|
31
|
+
|
32
|
+
# Regexp for parsing an IP address
|
33
|
+
IP_REGEXP = /^[0-9a-z\.:]+/
|
34
|
+
|
35
|
+
# Regex for parsing the date out of the log line
|
36
|
+
DATESTAMP_REGEXP = %r{[0-9]+/[a-z]+/[0-9]+:}i
|
37
|
+
# Regex for parsing DATESTAMP_REGEXP into a Date object
|
38
|
+
DATESTAMP_FORMAT = '%d/%B/%Y:'
|
39
|
+
|
40
|
+
# Regex for parsing the datetime out of the log line
|
41
|
+
TIMESTAMP_REGEXP = %r{[0-9]+/[a-z]+/[0-9]+:[0-9]+:[0-9]+:[0-9]+ (-|\+)[0-9]+}i
|
42
|
+
# Regex for parsing TIMESTAMP_REGEXP into a DateTime object
|
43
|
+
TIMESTAMP_FORMAT = '%d/%B/%Y:%H:%M:%S %z'
|
44
|
+
|
45
|
+
# Initializes a new Entry object. An Apache or Nginx log line should be
|
46
|
+
# passed to it.
|
47
|
+
def initialize(line)
|
48
|
+
@line = line
|
49
|
+
@user_agent = line.slice(USER_AGENT_REGEXP)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns the IP address the hit originated from
|
53
|
+
def ip
|
54
|
+
@line.slice(IP_REGEXP)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Returns the Date the hit occurred on
|
58
|
+
def on
|
59
|
+
date_str = @line.slice(DATESTAMP_REGEXP)
|
60
|
+
date_str ? Date.strptime(date_str, DATESTAMP_FORMAT) : nil
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns the DateTime the hit occurred at
|
64
|
+
def at
|
65
|
+
datetime_str = @line.slice(TIMESTAMP_REGEXP)
|
66
|
+
datetime_str ? DateTime.strptime(datetime_str, TIMESTAMP_FORMAT) : nil
|
19
67
|
end
|
20
68
|
end
|
21
|
-
entries
|
22
|
-
end
|
23
69
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
70
|
+
# Accepts a glob path like /var/logs/apache/my-site.access.log*,
|
71
|
+
# parses all matching files into an array of Entry objects, and returns them.
|
72
|
+
# Gzipped log files are parsed by Zlib.
|
73
|
+
#
|
74
|
+
# Options:
|
75
|
+
#
|
76
|
+
# :match A regular expression. Only lines which match this will be returned.
|
77
|
+
# :ignore A regular expression. Any lines which match this will be ignored.
|
78
|
+
def self.entries(glob_str, options={})
|
79
|
+
match, ignore = options[:match], options[:ignore]
|
80
|
+
entries = []
|
81
|
+
|
82
|
+
# Define the parse lambda
|
83
|
+
parse = (match or ignore) \
|
84
|
+
? lambda { |line| entries << Entry.new(line) unless (match and line !~ match) or (ignore and line =~ ignore) } \
|
85
|
+
: lambda { |line| entries << Entry.new(line) }
|
86
|
+
|
87
|
+
# Define the read lambda
|
88
|
+
read = lambda do |f|
|
89
|
+
zipped = f =~ /\.gz\Z/i
|
90
|
+
return unless ZLIB or not zipped
|
91
|
+
File.open(f, 'r') do |file|
|
92
|
+
handle = zipped ? Zlib::GzipReader.new(file) : file
|
93
|
+
#handle.each_line &parse # A little slower, but may be more memory efficient
|
94
|
+
handle.readlines.each &parse
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
Dir.glob(glob_str).each &read
|
99
|
+
entries
|
39
100
|
end
|
40
101
|
end
|
41
102
|
end
|
@@ -34,7 +34,7 @@ module DoubleAgent
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
# Returns the browser's name. If you provide
|
37
|
+
# Returns the browser's name. If you provide a user agent string as an argument,
|
38
38
|
# it will attempt to also return the major version number. E.g. "Firefox 4".
|
39
39
|
def browser(ua=nil)
|
40
40
|
if ua and (@version or @safe_version)
|
@@ -48,7 +48,7 @@ module DoubleAgent
|
|
48
48
|
# BrowserParser would return the Chromium BrowserParser. For browsers that are their
|
49
49
|
# own family (e.g. Firefox, IE) it will end up returning itself.
|
50
50
|
def family
|
51
|
-
BROWSERS[family_sym]
|
51
|
+
BROWSERS[@family_sym]
|
52
52
|
end
|
53
53
|
|
54
54
|
private
|
@@ -56,7 +56,7 @@ module DoubleAgent
|
|
56
56
|
# Attempts to parse and return the browser's version from a user agent string. Returns
|
57
57
|
# nil if nothing is found.
|
58
58
|
def version(ua)
|
59
|
-
if @safe_version
|
59
|
+
if @safe_version
|
60
60
|
ua.slice(@safe_version[0]).slice(@safe_version[1])
|
61
61
|
else
|
62
62
|
ua.slice(@version)
|
@@ -83,7 +83,7 @@ module DoubleAgent
|
|
83
83
|
# OSParser would return the GNU/Linux OSerParser. For OSes that are their own
|
84
84
|
# family (e.g. OS X) it will end up returning itself.
|
85
85
|
def family
|
86
|
-
OSES[family_sym]
|
86
|
+
OSES[@family_sym]
|
87
87
|
end
|
88
88
|
end
|
89
89
|
|
data/lib/double_agent/stats.rb
CHANGED
@@ -1,49 +1,65 @@
|
|
1
1
|
module DoubleAgent
|
2
|
-
# True if running under less than Ruby 1.9
|
3
|
-
BAD_RUBY = RUBY_VERSION < '1.9.0'
|
4
|
-
|
5
|
-
if BAD_RUBY
|
6
|
-
require 'bigdecimal'
|
7
|
-
# If BAD_RUBY, this is used in lieu of the native round method
|
8
|
-
def self.better_round(f, n)
|
9
|
-
d = BigDecimal.new f.to_s
|
10
|
-
d.round(n).to_f
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
# For the given "things", returns the share of the group that each attr has.
|
15
|
-
#
|
16
|
-
# "things" is an array of objects who's classes "include DoubleAgent::Resource".
|
17
|
-
#
|
18
|
-
# "args" is one or more method symbols from DoubleAgent::Resource.
|
19
2
|
#
|
20
|
-
#
|
21
|
-
#
|
3
|
+
# The Stats module provides methods for determining browser and/or OS share
|
4
|
+
# for large numbers of DoubleAgent::Resource objects.
|
22
5
|
#
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
# Example, Browser/OS share, asking for symbols back:
|
28
|
-
# DoubleAgent.percentages_for(server_log_entries, :browser_sym, :os_sym)
|
29
|
-
# [[:firefox, :windows_7, 50.4], [:chrome, :osx, 19.6], [:msie, :windows_xp, 15], [:safari, :osx, 10], [:other, :other, 5]]
|
30
|
-
def self.percentages_for(things, *args)
|
31
|
-
options = args.last.is_a?(Hash) ? args.pop : {} # Break out options
|
32
|
-
p = {}
|
33
|
-
things.each do |h|
|
34
|
-
syms = args.map { |attr| h.send attr }
|
35
|
-
p[syms] ||= 0
|
36
|
-
p[syms] += 1
|
37
|
-
end
|
38
|
-
size = things.size.to_f
|
39
|
-
p = p.to_a
|
6
|
+
module Stats
|
7
|
+
# True if running under less than Ruby 1.9
|
8
|
+
BAD_RUBY = RUBY_VERSION < '1.9.0'
|
9
|
+
|
40
10
|
if BAD_RUBY
|
41
|
-
|
42
|
-
|
43
|
-
|
11
|
+
require 'bigdecimal'
|
12
|
+
# If BAD_RUBY, this is used in lieu of the native round method
|
13
|
+
def self.better_round(f, n)
|
14
|
+
d = BigDecimal.new f.to_s
|
15
|
+
d.round(n).to_f
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# For the given "things", returns the share of the group that each attr has.
|
20
|
+
#
|
21
|
+
# "things" is an array of objects who's classes mix-in DoubleAgent::Resource.
|
22
|
+
#
|
23
|
+
# "args" is one or more method symbols from DoubleAgent::Resource.
|
24
|
+
#
|
25
|
+
# "args" may have, as it's last member, :threshold => n, where n is the number of the lowest
|
26
|
+
# percentage you want returned.
|
27
|
+
#
|
28
|
+
# Returns an array of [attribute(s), percent of total, number of total]
|
29
|
+
#
|
30
|
+
# Example, Browser Family share:
|
31
|
+
#
|
32
|
+
# DoubleAgent::Stats.percentages_for(logins, :browser_family)
|
33
|
+
# [['Firefox', 50.4, 5040], ['Chrome', 19.6, 1960], ['Internet Explorer', 15, 1500], ['Safari', 10, 1000], ['Unknown', 5, 500]]
|
34
|
+
#
|
35
|
+
# Example, Browser/OS share, asking for symbols back:
|
36
|
+
#
|
37
|
+
# DoubleAgent::Stats.percentages_for(server_log_entries, :browser_sym, :os_sym)
|
38
|
+
# [[:firefox, :windows_7, 50.4, 5040], [:chrome, :osx, 19.6, 1960], [:msie, :windows_xp, 15, 1500], [:safari, :osx, 10, 1000], [:other, :other, 5, 100]]
|
39
|
+
def self.percentages_for(things, *args)
|
40
|
+
options = args.last.is_a?(Hash) ? args.pop : {} # Break out options
|
41
|
+
results = {}
|
42
|
+
# Count each instance
|
43
|
+
things.each do |h|
|
44
|
+
syms = args.map { |attr| h.send attr }
|
45
|
+
results[syms] ||= 0
|
46
|
+
results[syms] += 1
|
47
|
+
end
|
48
|
+
size = things.size.to_f
|
49
|
+
results = results.to_a
|
50
|
+
# From the total, calculate the percentage held by each browser, OS, etc.
|
51
|
+
if BAD_RUBY
|
52
|
+
results.collect! { |k,n| [*k.<<(better_round(((n * 100) / size), 2)).<<(n)] }
|
53
|
+
else
|
54
|
+
# Ruby 1.9 syntax that blows up in Ruby 1.8
|
55
|
+
#results.collect! { |k,n| [*k, ((n * 100) / size).round(2), n] }
|
56
|
+
results.collect! { |k,n| [*k.<<(((n * 100) / size).round(2)).<<(n)] }
|
57
|
+
end
|
58
|
+
# Sort in ascending order
|
59
|
+
results.sort! { |a,b| b.last <=> a.last }
|
60
|
+
# Reject percentages below a specified threshold
|
61
|
+
results.reject! { |a| a[-2] < options[:threshold] } if options[:threshold]
|
62
|
+
results
|
44
63
|
end
|
45
|
-
p.sort! { |a,b| b.last <=> a.last }
|
46
|
-
p.reject! { |a| a.last < options[:threshold] } if options[:threshold]
|
47
|
-
p
|
48
64
|
end
|
49
65
|
end
|
data/lib/double_agent.rb
CHANGED
data/spec/data_spec.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
DA = DoubleAgent
|
4
|
+
|
5
|
+
describe DoubleAgent do
|
6
|
+
# Internet Explorer
|
7
|
+
it 'should be Internet Explorer 10 on Windows 8' do
|
8
|
+
ua = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/5.0"
|
9
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 10 on Windows 8'
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should be Internet Explorer 9 on Windows 7' do
|
13
|
+
ua = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0"
|
14
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 9 on Windows 7'
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should be Internet Explorer 8 on Windows Vista' do
|
18
|
+
ua = "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Win64; x64; Trident/5.0"
|
19
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 8 on Windows Vista'
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should be Internet Explorer 7 on Windows XP' do
|
23
|
+
ua = "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.2; Win64; x64; Trident/5.0"
|
24
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 7 on Windows XP'
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should be Internet Explorer 7 on Windows XP' do
|
28
|
+
ua = "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.1; Win64; x64; Trident/5.0"
|
29
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Internet Explorer 7 on Windows XP'
|
30
|
+
end
|
31
|
+
|
32
|
+
# Chrome
|
33
|
+
it 'should be Chrome 12 on Windows XP' do
|
34
|
+
ua = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.25 (KHTML, like Gecko) Chrome/12.0.706.0 Safari/534.25"
|
35
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Chrome 12 on Windows XP'
|
36
|
+
end
|
37
|
+
|
38
|
+
# Chromium
|
39
|
+
it 'should be Chrome 12 on Ubuntu' do
|
40
|
+
ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.703.0 Chrome/12.0.703.0 Safari/534.24"
|
41
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Chrome 12 on Ubuntu'
|
42
|
+
end
|
43
|
+
|
44
|
+
# Android
|
45
|
+
it 'should be Android 2.3 on Android' do
|
46
|
+
ua = "Mozilla/5.0 (Linux; U; Android 2.3.3; zh-tw; HTC_Pyramid Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"
|
47
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Android 2.3 on Android'
|
48
|
+
end
|
49
|
+
|
50
|
+
# Safari
|
51
|
+
it 'should be Safari 5 on OS X' do
|
52
|
+
ua = "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27"
|
53
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Safari 5 on OS X'
|
54
|
+
end
|
55
|
+
|
56
|
+
# Opera
|
57
|
+
it 'should be Opera 11 on GNU/Linux' do
|
58
|
+
ua = "Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00"
|
59
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Opera 11 on GNU/Linux'
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should be Opera 11 on Windows 7' do
|
63
|
+
ua = "Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01"
|
64
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Opera 11 on Windows 7'
|
65
|
+
end
|
66
|
+
|
67
|
+
# Firefox
|
68
|
+
it 'should be Firefox 4 on GNU/Linux' do
|
69
|
+
ua = "Mozilla/5.0 (X11; U; Linux x86_64; pl-PL; rv:2.0) Gecko/20110307 Firefox/4.0"
|
70
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Firefox 4 on GNU/Linux'
|
71
|
+
end
|
72
|
+
|
73
|
+
# Epiphany
|
74
|
+
it 'should be Epiphany on GNU/Linux' do
|
75
|
+
ua = "Mozilla/5.0 (X11; U; Linux x86_64; fr-FR) AppleWebKit/534.7 (KHTML, like Gecko) Epiphany/2.30.6 Safari/534.7"
|
76
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Epiphany on GNU/Linux'
|
77
|
+
end
|
78
|
+
|
79
|
+
# Konqueror
|
80
|
+
it 'should be Konqueror on FreeBSD' do
|
81
|
+
ua = "Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)"
|
82
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on FreeBSD'
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should be Konqueror on Fedora' do
|
86
|
+
ua = "Mozilla/5.0 (compatible; Konqueror/4.4; Linux) KHTML/4.4.1 (like Gecko) Fedora/4.4.1-1.fc12"
|
87
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on Fedora'
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'should be Konqueror on Slackware' do
|
91
|
+
ua = "Mozilla/5.0 (compatible; Konqueror/4.2; Linux) KHTML/4.2.4 (like Gecko) Slackware/13.0"
|
92
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'Konqueror on Slackware'
|
93
|
+
end
|
94
|
+
|
95
|
+
# BlackBerry
|
96
|
+
it 'should be BlackBerry on BlackBerry' do
|
97
|
+
ua = "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; zh-TW) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.448 Mobile Safari/534.8+"
|
98
|
+
"#{DA.browser ua} on #{DA.os ua}".should == 'BlackBerry on BlackBerry'
|
99
|
+
end
|
100
|
+
end
|
data/spec/parser_spec.rb
CHANGED
@@ -1,100 +1,114 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/spec_helper'
|
2
2
|
|
3
|
-
DA = DoubleAgent
|
4
|
-
|
5
3
|
describe DoubleAgent do
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
4
|
+
context 'Parser' do
|
5
|
+
before do
|
6
|
+
@ua_string = 'Mozilla/5.0 (X11; Ubuntu Linux i686; rv:2.0) Gecko/20100101 Firefox/4.0'
|
7
|
+
end
|
8
|
+
|
9
|
+
#browser
|
10
|
+
it 'returns Firefox 4 for browser' do
|
11
|
+
DoubleAgent.browser(@ua_string).should == 'Firefox 4'
|
12
|
+
end
|
13
|
+
it 'returns Unknown for browser' do
|
14
|
+
DoubleAgent.browser('froofroo').should == 'Unknown'
|
15
|
+
end
|
16
|
+
|
17
|
+
#browser_sym
|
18
|
+
it 'returns :firefox for browser_sym' do
|
19
|
+
DoubleAgent.browser_sym(@ua_string).should == :firefox
|
20
|
+
end
|
21
|
+
it 'returns :unknown for browser_sym' do
|
22
|
+
DoubleAgent.browser_sym('froofroo').should == :unknown
|
23
|
+
end
|
24
|
+
it 'returns :unknown for an empty browser_sym' do
|
25
|
+
DoubleAgent.browser_sym('').should == :unknown
|
26
|
+
end
|
27
|
+
it 'returns :unknown for a nil browser_sym' do
|
28
|
+
DoubleAgent.browser_sym(nil).should == :unknown
|
29
|
+
end
|
30
|
+
|
31
|
+
#browser_family
|
32
|
+
it 'returns Firefox for browser family' do
|
33
|
+
DoubleAgent.browser_family(@ua_string).should == 'Firefox'
|
34
|
+
end
|
35
|
+
|
36
|
+
#browser_family_sym
|
37
|
+
it 'returns :firefox for browser_family_sym' do
|
38
|
+
DoubleAgent.browser_family_sym(@ua_string).should == :firefox
|
39
|
+
end
|
40
|
+
it 'returns :unknown for an empty browser_family_sym' do
|
41
|
+
DoubleAgent.browser_family_sym('').should == :unknown
|
42
|
+
end
|
43
|
+
it 'returns :unknown for a nil browser_family_sym' do
|
44
|
+
DoubleAgent.browser_family_sym(nil).should == :unknown
|
45
|
+
end
|
46
|
+
|
47
|
+
#browser_icon
|
48
|
+
it 'returns :firefox for browser_sym' do
|
49
|
+
DoubleAgent.browser_icon(@ua_string).should == :firefox
|
50
|
+
end
|
51
|
+
it 'returns :unkown for an empty browser_sym' do
|
52
|
+
DoubleAgent.browser_icon('').should == :unknown
|
53
|
+
end
|
54
|
+
it 'returns :unkown for a nil browser_sym' do
|
55
|
+
DoubleAgent.browser_icon(nil).should == :unknown
|
56
|
+
end
|
57
|
+
|
58
|
+
#browser_family_icon
|
59
|
+
it 'returns :firefox for browser_family_sym' do
|
60
|
+
DoubleAgent.browser_family_icon(@ua_string).should == :firefox
|
61
|
+
end
|
62
|
+
it 'returns :unkown for an empty browser_family_sym' do
|
63
|
+
DoubleAgent.browser_family_icon('').should == :unknown
|
64
|
+
end
|
65
|
+
it 'returns :unkown for a nil browser_family_sym' do
|
66
|
+
DoubleAgent.browser_family_icon(nil).should == :unknown
|
67
|
+
end
|
68
|
+
|
69
|
+
#os
|
70
|
+
it 'returns Ubuntua for OS' do
|
71
|
+
DoubleAgent.os(@ua_string).should == 'Ubuntu'
|
72
|
+
end
|
73
|
+
it 'returns Unknowna for OS' do
|
74
|
+
DoubleAgent.os('froofroo').should == 'Unknown'
|
75
|
+
end
|
76
|
+
it 'returns Unknowna for OS' do
|
77
|
+
DoubleAgent.os('').should == 'Unknown'
|
78
|
+
end
|
79
|
+
|
80
|
+
#os_sym
|
81
|
+
it 'returns :ubuntu for os_sym' do
|
82
|
+
DoubleAgent.os_sym(@ua_string).should == :ubuntu
|
83
|
+
end
|
84
|
+
it 'returns :unknown for os_sym' do
|
85
|
+
DoubleAgent.os_sym('froofroo').should == :unknown
|
86
|
+
end
|
87
|
+
it 'returns :unknown for an empty os_sym' do
|
88
|
+
DoubleAgent.os_sym('').should == :unknown
|
89
|
+
end
|
90
|
+
it 'returns :unknown for a nil os_sym' do
|
91
|
+
DoubleAgent.os_sym(nil).should == :unknown
|
92
|
+
end
|
93
|
+
|
94
|
+
#os_family
|
95
|
+
it 'returns GNU/Linux OS family' do
|
96
|
+
DoubleAgent.os_family(@ua_string).should == 'GNU/Linux'
|
97
|
+
end
|
98
|
+
|
99
|
+
#os_family_sym
|
100
|
+
it 'returns :linux for os_family_sym' do
|
101
|
+
DoubleAgent.os_family_sym(@ua_string).should == :linux
|
102
|
+
end
|
103
|
+
|
104
|
+
#os_icon
|
105
|
+
it 'returns :ubuntu for os_sym' do
|
106
|
+
DoubleAgent.os_icon(@ua_string).should == :ubuntu
|
107
|
+
end
|
108
|
+
|
109
|
+
#os_family_icon
|
110
|
+
it 'returns :linux for os_family_sym' do
|
111
|
+
DoubleAgent.os_family_icon(@ua_string).should == :linux
|
112
|
+
end
|
99
113
|
end
|
100
114
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'rspec'
|
2
|
-
require File.dirname(__FILE__) + '/../lib/double_agent/
|
2
|
+
require File.dirname(__FILE__) + '/../lib/double_agent/parser'
|
3
3
|
require File.dirname(__FILE__) + '/../lib/double_agent/resources'
|
4
4
|
require File.dirname(__FILE__) + '/../lib/double_agent/stats'
|
5
5
|
require File.dirname(__FILE__) + '/../lib/double_agent/logs'
|
@@ -7,3 +7,13 @@ require File.dirname(__FILE__) + '/../lib/double_agent/logs'
|
|
7
7
|
Rspec.configure do |c|
|
8
8
|
c.mock_with :rspec
|
9
9
|
end
|
10
|
+
|
11
|
+
module Kernel
|
12
|
+
def suppress_warnings
|
13
|
+
original_verbosity = $VERBOSE
|
14
|
+
$VERBOSE = nil
|
15
|
+
result = yield
|
16
|
+
$VERBOSE = original_verbosity
|
17
|
+
return result
|
18
|
+
end
|
19
|
+
end
|
data/spec/stats_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/spec_helper'
|
2
2
|
|
3
3
|
log_glob = File.dirname(__FILE__) + '/data/*.access.log*'
|
4
|
-
entries = DoubleAgent::
|
4
|
+
entries = DoubleAgent::Logs::entries(log_glob, :match => /^\d/)
|
5
5
|
|
6
6
|
describe DoubleAgent do
|
7
7
|
context 'Logs' do
|
@@ -10,27 +10,54 @@ describe DoubleAgent do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
it 'should have loaded n log entries' do
|
13
|
-
DoubleAgent::
|
13
|
+
DoubleAgent::Logs::entries(log_glob, :match => /^\d/, :ignore => %r{ /dashboard }).size.should == 44
|
14
|
+
end
|
15
|
+
|
16
|
+
context 'without zlib' do
|
17
|
+
it 'should have loaded n log entries' do
|
18
|
+
suppress_warnings { DoubleAgent::Logs::ZLIB = false }
|
19
|
+
plain_entries = DoubleAgent::Logs::entries(log_glob, :match => /^\d/)
|
20
|
+
suppress_warnings { DoubleAgent::Logs::ZLIB = true }
|
21
|
+
plain_entries.size.should == 17
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
context 'parsing other data' do
|
26
|
+
before :each do
|
27
|
+
@line = DoubleAgent::Logs::Entry.new '68.52.99.211 - - [04/May/2011:08:21:04 -0400] "GET / HTTP/1.1" 200 1312 "-" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.17) Gecko/20110420 Firefox/3.6.17"'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should parse the IP' do
|
31
|
+
@line.ip.should == '68.52.99.211'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should parse datestamps' do
|
35
|
+
@line.on.should == Date.new(2011, 5, 4)
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'should parse timestamps' do
|
39
|
+
@line.at.should == DateTime.new(2011, 5, 4, 8, 21, 4, '-0400')
|
40
|
+
end
|
14
41
|
end
|
15
42
|
end
|
16
43
|
|
17
44
|
context 'Stats' do
|
18
45
|
it 'should calculate stats' do
|
19
|
-
stats = DoubleAgent.percentages_for entries, :browser_family, :os_family
|
20
|
-
answer = [["Internet Explorer", "Windows", 42.55],
|
21
|
-
["Chromium", "GNU/Linux", 40.43],
|
22
|
-
["Firefox", "GNU/Linux", 10.64],
|
23
|
-
["Firefox", "OS X", 4.26],
|
24
|
-
["Safari", "OS X", 2.13]]
|
46
|
+
stats = DoubleAgent::Stats.percentages_for entries, :browser_family, :os_family
|
47
|
+
answer = [["Internet Explorer", "Windows", 42.55, 20],
|
48
|
+
["Chromium", "GNU/Linux", 40.43, 19],
|
49
|
+
["Firefox", "GNU/Linux", 10.64, 5],
|
50
|
+
["Firefox", "OS X", 4.26, 2],
|
51
|
+
["Safari", "OS X", 2.13, 1]]
|
25
52
|
stats.should == answer
|
26
53
|
end
|
27
54
|
|
28
55
|
it 'should ignore stats below the threshold' do
|
29
|
-
stats = DoubleAgent.percentages_for entries, :browser_family, :os_family, :threshold => 3.0
|
30
|
-
answer = [["Internet Explorer", "Windows", 42.55],
|
31
|
-
["Chromium", "GNU/Linux", 40.43],
|
32
|
-
["Firefox", "GNU/Linux", 10.64],
|
33
|
-
["Firefox", "OS X", 4.26]]
|
56
|
+
stats = DoubleAgent::Stats.percentages_for entries, :browser_family, :os_family, :threshold => 3.0
|
57
|
+
answer = [["Internet Explorer", "Windows", 42.55, 20],
|
58
|
+
["Chromium", "GNU/Linux", 40.43, 19],
|
59
|
+
["Firefox", "GNU/Linux", 10.64, 5],
|
60
|
+
["Firefox", "OS X", 4.26, 2]]
|
34
61
|
stats.should == answer
|
35
62
|
end
|
36
63
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
- 1
|
8
7
|
- 2
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Jordan Hollinger
|
@@ -14,11 +14,11 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-07-30 00:00:00 -04:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
21
|
-
description: Browser User Agent string parser with
|
21
|
+
description: Browser User Agent string parser with resources, stats, and a log reader
|
22
22
|
email: jordan@jordanhollinger.com
|
23
23
|
executables: []
|
24
24
|
|
@@ -31,17 +31,17 @@ files:
|
|
31
31
|
- lib/double_agent.rb
|
32
32
|
- lib/double_agent/all.rb
|
33
33
|
- lib/double_agent/logs.rb
|
34
|
-
- lib/double_agent/core.rb
|
35
34
|
- lib/double_agent/resources.rb
|
36
35
|
- lib/double_agent/stats.rb
|
36
|
+
- lib/double_agent/parser.rb
|
37
37
|
- data/browsers.yml
|
38
38
|
- data/oses.yml
|
39
39
|
- spec/spec_helper.rb
|
40
|
-
- spec/core_spec.rb
|
41
40
|
- spec/data/httpd.access.log.1.gz
|
42
41
|
- spec/data/httpd.access.log
|
43
42
|
- spec/stats_spec.rb
|
44
43
|
- spec/resources_spec.rb
|
44
|
+
- spec/data_spec.rb
|
45
45
|
- spec/parser_spec.rb
|
46
46
|
- README.rdoc
|
47
47
|
- LICENSE
|
data/spec/core_spec.rb
DELETED
@@ -1,114 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
-
|
3
|
-
describe DoubleAgent do
|
4
|
-
context 'Core' do
|
5
|
-
before do
|
6
|
-
@ua_string = 'Mozilla/5.0 (X11; Ubuntu Linux i686; rv:2.0) Gecko/20100101 Firefox/4.0'
|
7
|
-
end
|
8
|
-
|
9
|
-
#browser
|
10
|
-
it 'returns Firefox 4 for browser' do
|
11
|
-
DoubleAgent.browser(@ua_string).should == 'Firefox 4'
|
12
|
-
end
|
13
|
-
it 'returns Unknown for browser' do
|
14
|
-
DoubleAgent.browser('froofroo').should == 'Unknown'
|
15
|
-
end
|
16
|
-
|
17
|
-
#browser_sym
|
18
|
-
it 'returns :firefox for browser_sym' do
|
19
|
-
DoubleAgent.browser_sym(@ua_string).should == :firefox
|
20
|
-
end
|
21
|
-
it 'returns :unknown for browser_sym' do
|
22
|
-
DoubleAgent.browser_sym('froofroo').should == :unknown
|
23
|
-
end
|
24
|
-
it 'returns :unknown for an empty browser_sym' do
|
25
|
-
DoubleAgent.browser_sym('').should == :unknown
|
26
|
-
end
|
27
|
-
it 'returns :unknown for a nil browser_sym' do
|
28
|
-
DoubleAgent.browser_sym(nil).should == :unknown
|
29
|
-
end
|
30
|
-
|
31
|
-
#browser_family
|
32
|
-
it 'returns Firefox for browser family' do
|
33
|
-
DoubleAgent.browser_family(@ua_string).should == 'Firefox'
|
34
|
-
end
|
35
|
-
|
36
|
-
#browser_family_sym
|
37
|
-
it 'returns :firefox for browser_family_sym' do
|
38
|
-
DoubleAgent.browser_family_sym(@ua_string).should == :firefox
|
39
|
-
end
|
40
|
-
it 'returns :unknown for an empty browser_family_sym' do
|
41
|
-
DoubleAgent.browser_family_sym('').should == :unknown
|
42
|
-
end
|
43
|
-
it 'returns :unknown for a nil browser_family_sym' do
|
44
|
-
DoubleAgent.browser_family_sym(nil).should == :unknown
|
45
|
-
end
|
46
|
-
|
47
|
-
#browser_icon
|
48
|
-
it 'returns :firefox for browser_sym' do
|
49
|
-
DoubleAgent.browser_icon(@ua_string).should == :firefox
|
50
|
-
end
|
51
|
-
it 'returns :unkown for an empty browser_sym' do
|
52
|
-
DoubleAgent.browser_icon('').should == :unknown
|
53
|
-
end
|
54
|
-
it 'returns :unkown for a nil browser_sym' do
|
55
|
-
DoubleAgent.browser_icon(nil).should == :unknown
|
56
|
-
end
|
57
|
-
|
58
|
-
#browser_family_icon
|
59
|
-
it 'returns :firefox for browser_family_sym' do
|
60
|
-
DoubleAgent.browser_family_icon(@ua_string).should == :firefox
|
61
|
-
end
|
62
|
-
it 'returns :unkown for an empty browser_family_sym' do
|
63
|
-
DoubleAgent.browser_family_icon('').should == :unknown
|
64
|
-
end
|
65
|
-
it 'returns :unkown for a nil browser_family_sym' do
|
66
|
-
DoubleAgent.browser_family_icon(nil).should == :unknown
|
67
|
-
end
|
68
|
-
|
69
|
-
#os
|
70
|
-
it 'returns Ubuntua for OS' do
|
71
|
-
DoubleAgent.os(@ua_string).should == 'Ubuntu'
|
72
|
-
end
|
73
|
-
it 'returns Unknowna for OS' do
|
74
|
-
DoubleAgent.os('froofroo').should == 'Unknown'
|
75
|
-
end
|
76
|
-
it 'returns Unknowna for OS' do
|
77
|
-
DoubleAgent.os('').should == 'Unknown'
|
78
|
-
end
|
79
|
-
|
80
|
-
#os_sym
|
81
|
-
it 'returns :ubuntu for os_sym' do
|
82
|
-
DoubleAgent.os_sym(@ua_string).should == :ubuntu
|
83
|
-
end
|
84
|
-
it 'returns :unknown for os_sym' do
|
85
|
-
DoubleAgent.os_sym('froofroo').should == :unknown
|
86
|
-
end
|
87
|
-
it 'returns :unknown for an empty os_sym' do
|
88
|
-
DoubleAgent.os_sym('').should == :unknown
|
89
|
-
end
|
90
|
-
it 'returns :unknown for a nil os_sym' do
|
91
|
-
DoubleAgent.os_sym(nil).should == :unknown
|
92
|
-
end
|
93
|
-
|
94
|
-
#os_family
|
95
|
-
it 'returns GNU/Linux OS family' do
|
96
|
-
DoubleAgent.os_family(@ua_string).should == 'GNU/Linux'
|
97
|
-
end
|
98
|
-
|
99
|
-
#os_family_sym
|
100
|
-
it 'returns :linux for os_family_sym' do
|
101
|
-
DoubleAgent.os_family_sym(@ua_string).should == :linux
|
102
|
-
end
|
103
|
-
|
104
|
-
#os_icon
|
105
|
-
it 'returns :ubuntu for os_sym' do
|
106
|
-
DoubleAgent.os_icon(@ua_string).should == :ubuntu
|
107
|
-
end
|
108
|
-
|
109
|
-
#os_family_icon
|
110
|
-
it 'returns :linux for os_family_sym' do
|
111
|
-
DoubleAgent.os_family_icon(@ua_string).should == :linux
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|