double_agent 0.0.3 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +10 -0
- data/README.rdoc +27 -56
- data/data/oses.yml +37 -37
- data/lib/double_agent/logs.rb +12 -12
- data/lib/double_agent/stats.rb +9 -4
- data/spec/stats_spec.rb +23 -8
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
== Release 0.1.1 (May 10, 2011)
|
2
|
+
|
3
|
+
* #log_entries takes :match and :ignore regexp options
|
4
|
+
|
5
|
+
* Added :threshold option to #percentages_for
|
6
|
+
|
7
|
+
* Massive performance improvements to log parsing
|
8
|
+
|
9
|
+
* Minor performance improvements to user agent parsing
|
10
|
+
|
1
11
|
== Release 0.0.3 (May 6, 2011)
|
2
12
|
|
3
13
|
* Fixed bug from 0.0.2 in Ruby 1.8 where browser versions never got parsed
|
data/README.rdoc
CHANGED
@@ -3,35 +3,32 @@
|
|
3
3
|
double_agent is a library for parsing browser and operating system info out of user
|
4
4
|
agent strings. It is designed for parsing large sets for review or analysis.
|
5
5
|
|
6
|
-
|
6
|
+
== Installation
|
7
|
+
|
8
|
+
gem install double_agent
|
9
|
+
|
10
|
+
== Loading
|
11
|
+
|
12
|
+
Double Agent is broken up into four components - core, resources, stats, logs
|
13
|
+
|
14
|
+
# Load the default components (core, resources and stats)
|
7
15
|
require 'double_agent'
|
8
16
|
|
9
|
-
# Load
|
10
|
-
require 'double_agent/core'
|
11
|
-
require 'double_agent/resources'
|
12
|
-
require 'double_agent/stats'
|
13
|
-
require 'double_agent/logs'
|
17
|
+
# Load components individually (some have dependencies on others)
|
18
|
+
require 'double_agent/core|resources|stats|logs'
|
14
19
|
|
15
|
-
# Load everything
|
20
|
+
# Load everything
|
16
21
|
require 'double_agent/all'
|
17
22
|
|
18
|
-
= Core
|
19
|
-
|
20
|
-
The core parser.
|
23
|
+
= The Core Parser
|
21
24
|
|
22
|
-
ua_string = "pretent I'm a user agent string for
|
25
|
+
ua_string = "pretent I'm a user agent string for Chrome on Ubuntu"
|
23
26
|
|
24
27
|
DoubleAgent.browser(ua_string)
|
25
|
-
=> "
|
28
|
+
=> "Chrome 11"
|
26
29
|
|
27
30
|
DoubleAgent.browser_family(ua_string)
|
28
|
-
=> "
|
29
|
-
|
30
|
-
DoubleAgent.browser_sym(ua_string)
|
31
|
-
=> :firefox
|
32
|
-
|
33
|
-
DoubleAgent.browser_family_sym(ua_string)
|
34
|
-
=> :firefox
|
31
|
+
=> "Chromium"
|
35
32
|
|
36
33
|
DoubleAgent.os(ua_string)
|
37
34
|
=> "Ubuntu"
|
@@ -39,22 +36,18 @@ The core parser.
|
|
39
36
|
DoubleAgent.os_family(ua_string)
|
40
37
|
=> "GNU/Linux"
|
41
38
|
|
42
|
-
|
43
|
-
=> :ubuntu
|
44
|
-
|
45
|
-
DoubleAgent.os_family_sym(ua_string)
|
46
|
-
=> :linux
|
39
|
+
See the DoubleAgent module for more.
|
47
40
|
|
48
41
|
= Resources
|
49
42
|
|
50
|
-
|
51
|
-
|
43
|
+
DoubleAgent::Resource is a mix-in for objects with a user_agent method or attribute.
|
44
|
+
It gives that class's objects all of the above methods.
|
52
45
|
|
53
46
|
class Login
|
54
47
|
include DoubleAgent::Resource
|
55
48
|
|
56
49
|
def user_agent
|
57
|
-
#returns
|
50
|
+
#returns the user agent string the user logged in with
|
58
51
|
end
|
59
52
|
end
|
60
53
|
|
@@ -68,21 +61,7 @@ the user agent is available through user_agent, and presto!
|
|
68
61
|
|
69
62
|
= Stats
|
70
63
|
|
71
|
-
|
72
|
-
|
73
|
-
== Example 1
|
74
|
-
|
75
|
-
logins = Login.all
|
76
|
-
stats = DoubleAgent.percentages_for(logins, :browser)
|
77
|
-
|
78
|
-
p stats
|
79
|
-
=> [["Firefox 4", 20.0], ["Internet Explorer 8", 18.0], ...]
|
80
|
-
|
81
|
-
stats.each do |browser, percent|
|
82
|
-
puts "#{browser} - #{percent}%"
|
83
|
-
end
|
84
|
-
|
85
|
-
== Example 2
|
64
|
+
Calculate browser and OS shares for large sets of DoubleAgent::Resource objects with DoubleAgent::percentages_for.
|
86
65
|
|
87
66
|
logins = Login.all
|
88
67
|
stats = DoubleAgent.percentages_for(logins, :browser_family, :os_family)
|
@@ -96,23 +75,15 @@ Figure out what percent use which browser, browser family, os, etc.
|
|
96
75
|
|
97
76
|
= Logs
|
98
77
|
|
99
|
-
DoubleAgent
|
100
|
-
object. It even reads gzipped logs (requires zlib)!
|
101
|
-
|
78
|
+
DoubleAgent::log_entries parses through Apache and Nginx access logs, instantiating each log line into a DoubleAgent::LogEntry
|
79
|
+
object. It even reads gzipped logs (requires zlib)! Since the DoubleAgent::LogEntry class mixes in DoubleAgent::Resource, you
|
80
|
+
can easily calculate browser and/or OS market share on your site.
|
102
81
|
|
103
82
|
require 'double_agent'
|
104
83
|
require 'double_agent/logs'
|
105
84
|
|
106
85
|
entries = DoubleAgent.log_entries("/var/log/nginx/my-site.access.log*")
|
107
|
-
entries
|
108
|
-
puts entry.browser
|
109
|
-
end
|
86
|
+
stats = DoubleAgent.percentages_for(entries, :browser)
|
110
87
|
|
111
|
-
|
112
|
-
Internet Explorer 8
|
113
|
-
Internet Explorer 9
|
114
|
-
Firefox 4
|
115
|
-
Internet Explorer
|
116
|
-
Safari
|
117
|
-
Chrome
|
118
|
-
...
|
88
|
+
p stats
|
89
|
+
=> [["Firefox 4", 20.0], ["Internet Explorer 8", 18.0], ...]
|
data/data/oses.yml
CHANGED
@@ -1,40 +1,3 @@
|
|
1
|
-
- :name: Android
|
2
|
-
:sym: :android
|
3
|
-
:family_sym: :linux
|
4
|
-
:regex: android
|
5
|
-
|
6
|
-
- :name: Ubuntu
|
7
|
-
:sym: :ubuntu
|
8
|
-
:family_sym: :linux
|
9
|
-
:regex: ubuntu
|
10
|
-
|
11
|
-
- :name: Fedora
|
12
|
-
:sym: :fedora
|
13
|
-
:family_sym: :linux
|
14
|
-
:regex: fedora
|
15
|
-
|
16
|
-
- :name: Slackware
|
17
|
-
:sym: :slackware
|
18
|
-
:family_sym: :linux
|
19
|
-
:regex: slackware
|
20
|
-
|
21
|
-
- :name: GNU/Linux
|
22
|
-
:sym: :linux
|
23
|
-
:regex: linux
|
24
|
-
|
25
|
-
- :name: FreeBSD
|
26
|
-
:sym: :freebsd
|
27
|
-
:regex: freebsd
|
28
|
-
|
29
|
-
- :name: iOS
|
30
|
-
:sym: :ios
|
31
|
-
:family_sym: :osx
|
32
|
-
:regex: (iphone)|(ipad)
|
33
|
-
|
34
|
-
- :name: OS X
|
35
|
-
:sym: :osx
|
36
|
-
:regex: macintosh
|
37
|
-
|
38
1
|
- :name: Windows 8
|
39
2
|
:sym: :windows_8
|
40
3
|
:family_sym: :windows
|
@@ -63,6 +26,43 @@
|
|
63
26
|
:sym: :windows
|
64
27
|
:regex: windows
|
65
28
|
|
29
|
+
- :name: iOS
|
30
|
+
:sym: :ios
|
31
|
+
:family_sym: :osx
|
32
|
+
:regex: (iphone)|(ipad)
|
33
|
+
|
34
|
+
- :name: OS X
|
35
|
+
:sym: :osx
|
36
|
+
:regex: macintosh
|
37
|
+
|
38
|
+
- :name: Android
|
39
|
+
:sym: :android
|
40
|
+
:family_sym: :linux
|
41
|
+
:regex: android
|
42
|
+
|
43
|
+
- :name: Ubuntu
|
44
|
+
:sym: :ubuntu
|
45
|
+
:family_sym: :linux
|
46
|
+
:regex: ubuntu
|
47
|
+
|
48
|
+
- :name: Fedora
|
49
|
+
:sym: :fedora
|
50
|
+
:family_sym: :linux
|
51
|
+
:regex: fedora
|
52
|
+
|
53
|
+
- :name: Slackware
|
54
|
+
:sym: :slackware
|
55
|
+
:family_sym: :linux
|
56
|
+
:regex: slackware
|
57
|
+
|
58
|
+
- :name: GNU/Linux
|
59
|
+
:sym: :linux
|
60
|
+
:regex: linux
|
61
|
+
|
62
|
+
- :name: FreeBSD
|
63
|
+
:sym: :freebsd
|
64
|
+
:regex: freebsd
|
65
|
+
|
66
66
|
- :name: BlackBerry
|
67
67
|
:sym: :blackberry
|
68
68
|
:regex: blackberry
|
data/lib/double_agent/logs.rb
CHANGED
@@ -4,16 +4,18 @@ module DoubleAgent
|
|
4
4
|
# Accepts a glob path like /var/logs/apache/my-site.access.log*,
|
5
5
|
# parses all matching files into an array of LegEntry objects, and returns them.
|
6
6
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
def self.log_entries(glob_str,
|
10
|
-
|
7
|
+
# options[:match] and options[:ignore] can each take a regular expression,
|
8
|
+
# ignoring lines that do and don't match, respectively.
|
9
|
+
def self.log_entries(glob_str, options={})
|
10
|
+
gz_regexp = /\.gz\Z/i
|
11
|
+
entries = []
|
12
|
+
parse = (options[:match] or options[:ignore]) \
|
13
|
+
? lambda { |line| entries << LogEntry.new(line) if (options[:match].nil? or line =~ options[:match]) and (options[:ignore].nil? or line !~ options[:ignore]) } \
|
14
|
+
: lambda { |line| entries << LogEntry.new(line) }
|
11
15
|
Dir.glob(glob_str).each do |f|
|
12
|
-
File.open(f) do |file|
|
16
|
+
File.open(f, 'r') do |file|
|
13
17
|
handle = f =~ gz_regexp ? Zlib::GzipReader.new(file) : file
|
14
|
-
|
15
|
-
entries << LogEntry.new(line) if regex.nil? or line =~ regex
|
16
|
-
end
|
18
|
+
handle.readlines.each &parse
|
17
19
|
end
|
18
20
|
end
|
19
21
|
entries
|
@@ -25,16 +27,14 @@ module DoubleAgent
|
|
25
27
|
|
26
28
|
class LogEntry
|
27
29
|
# Regular expression for pulling a user agent string out of a log entry
|
28
|
-
USER_AGENT_REGEXP = /
|
29
|
-
include DoubleAgent::Resource
|
30
|
-
|
30
|
+
USER_AGENT_REGEXP = /" ".+$/
|
31
|
+
include DoubleAgent::Resource
|
31
32
|
# Returns the user agent string
|
32
33
|
attr_reader :user_agent
|
33
34
|
|
34
35
|
# Initializes a new LogEntry object. An Apache or Nginx log line should be
|
35
36
|
# passed to it.
|
36
37
|
def initialize(line)
|
37
|
-
#@line = line
|
38
38
|
@user_agent = line.slice(USER_AGENT_REGEXP)
|
39
39
|
end
|
40
40
|
end
|
data/lib/double_agent/stats.rb
CHANGED
@@ -15,7 +15,10 @@ module DoubleAgent
|
|
15
15
|
#
|
16
16
|
# "things" is an array of objects who's classes "include DoubleAgent::Resource".
|
17
17
|
#
|
18
|
-
# "
|
18
|
+
# "args" is one or more method symbols from DoubleAgent::Resource.
|
19
|
+
#
|
20
|
+
# "args" may have, as it's last member, :threshold => n, where n is the lowest
|
21
|
+
# percentage you want returned.
|
19
22
|
#
|
20
23
|
# Example, Browser Family share:
|
21
24
|
# DoubleAgent.percentages_for(logins, :browser_family)
|
@@ -24,11 +27,12 @@ module DoubleAgent
|
|
24
27
|
# Example, Browser/OS share, asking for symbols back:
|
25
28
|
# DoubleAgent.percentages_for(server_log_entries, :browser_sym, :os_sym)
|
26
29
|
# [[:firefox, :windows_7, 50.4], [:chrome, :osx, 19.6], [:msie, :windows_xp, 15], [:safari, :osx, 10], [:other, :other, 5]]
|
27
|
-
def self.percentages_for(things, *
|
30
|
+
def self.percentages_for(things, *args)
|
31
|
+
options = args.last.is_a?(Hash) ? args.pop : {} # Break out options
|
28
32
|
p = {}
|
29
33
|
things.each do |h|
|
30
|
-
syms =
|
31
|
-
p[syms]
|
34
|
+
syms = args.map { |attr| h.send attr }
|
35
|
+
p[syms] ||= 0
|
32
36
|
p[syms] += 1
|
33
37
|
end
|
34
38
|
size = things.size.to_f
|
@@ -39,6 +43,7 @@ module DoubleAgent
|
|
39
43
|
p.collect! { |k,n| [*k.<<(((n * 100) / size).round(2))] }
|
40
44
|
end
|
41
45
|
p.sort! { |a,b| b.last <=> a.last }
|
46
|
+
p.reject! { |a| a.last < options[:threshold] } if options[:threshold]
|
42
47
|
p
|
43
48
|
end
|
44
49
|
end
|
data/spec/stats_spec.rb
CHANGED
@@ -1,22 +1,37 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/spec_helper'
|
2
2
|
|
3
3
|
log_glob = File.dirname(__FILE__) + '/data/*.access.log*'
|
4
|
-
entries = DoubleAgent::log_entries(log_glob, /^\d/)
|
4
|
+
entries = DoubleAgent::log_entries(log_glob, :match => /^\d/)
|
5
5
|
|
6
6
|
describe DoubleAgent do
|
7
7
|
context 'Logs' do
|
8
8
|
it 'should have loaded n log entries' do
|
9
9
|
entries.size.should == 47
|
10
10
|
end
|
11
|
+
|
12
|
+
it 'should have loaded n log entries' do
|
13
|
+
DoubleAgent::log_entries(log_glob, :match => /^\d/, :ignore => %r{ /dashboard }).size.should == 44
|
14
|
+
end
|
11
15
|
end
|
12
16
|
|
13
17
|
context 'Stats' do
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
it 'should calculate stats' do
|
19
|
+
stats = DoubleAgent.percentages_for entries, :browser_family, :os_family
|
20
|
+
answer = [["Internet Explorer", "Windows", 42.55],
|
21
|
+
["Chromium", "GNU/Linux", 40.43],
|
22
|
+
["Firefox", "GNU/Linux", 10.64],
|
23
|
+
["Firefox", "OS X", 4.26],
|
24
|
+
["Safari", "OS X", 2.13]]
|
25
|
+
stats.should == answer
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should ignore stats below the threshold' do
|
29
|
+
stats = DoubleAgent.percentages_for entries, :browser_family, :os_family, :threshold => 3.0
|
30
|
+
answer = [["Internet Explorer", "Windows", 42.55],
|
31
|
+
["Chromium", "GNU/Linux", 40.43],
|
32
|
+
["Firefox", "GNU/Linux", 10.64],
|
33
|
+
["Firefox", "OS X", 4.26]]
|
34
|
+
stats.should == answer
|
35
|
+
end
|
21
36
|
end
|
22
37
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: double_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.1.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jordan Hollinger
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-05-10 00:00:00 Z
|
14
14
|
dependencies: []
|
15
15
|
|
16
16
|
description: Browser User Agent string parser with resource, stats, and a log reader
|