double_agent 0.0.3 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +10 -0
- data/README.rdoc +27 -56
- data/data/oses.yml +37 -37
- data/lib/double_agent/logs.rb +12 -12
- data/lib/double_agent/stats.rb +9 -4
- data/spec/stats_spec.rb +23 -8
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
== Release 0.1.1 (May 10, 2011)
|
2
|
+
|
3
|
+
* #log_entries takes :match and :ignore regexp options
|
4
|
+
|
5
|
+
* Added :threshold option to #percentages_for
|
6
|
+
|
7
|
+
* Massive performance improvements to log parsing
|
8
|
+
|
9
|
+
* Minor performance improvements to user agent parsing
|
10
|
+
|
1
11
|
== Release 0.0.3 (May 6, 2011)
|
2
12
|
|
3
13
|
* Fixed bug from 0.0.2 in Ruby 1.8 where browser versions never got parsed
|
data/README.rdoc
CHANGED
@@ -3,35 +3,32 @@
|
|
3
3
|
double_agent is a library for parsing browser and operating system info out of user
|
4
4
|
agent strings. It is designed for parsing large sets for review or analysis.
|
5
5
|
|
6
|
-
|
6
|
+
== Installation
|
7
|
+
|
8
|
+
gem install double_agent
|
9
|
+
|
10
|
+
== Loading
|
11
|
+
|
12
|
+
Double Agent is broken up into four components - core, resources, stats, logs
|
13
|
+
|
14
|
+
# Load the default components (core, resources and stats)
|
7
15
|
require 'double_agent'
|
8
16
|
|
9
|
-
# Load
|
10
|
-
require 'double_agent/core'
|
11
|
-
require 'double_agent/resources'
|
12
|
-
require 'double_agent/stats'
|
13
|
-
require 'double_agent/logs'
|
17
|
+
# Load components individually (some have dependencies on others)
|
18
|
+
require 'double_agent/core|resources|stats|logs'
|
14
19
|
|
15
|
-
# Load everything
|
20
|
+
# Load everything
|
16
21
|
require 'double_agent/all'
|
17
22
|
|
18
|
-
= Core
|
19
|
-
|
20
|
-
The core parser.
|
23
|
+
= The Core Parser
|
21
24
|
|
22
|
-
ua_string = "pretent I'm a user agent string for
|
25
|
+
ua_string = "pretent I'm a user agent string for Chrome on Ubuntu"
|
23
26
|
|
24
27
|
DoubleAgent.browser(ua_string)
|
25
|
-
=> "
|
28
|
+
=> "Chrome 11"
|
26
29
|
|
27
30
|
DoubleAgent.browser_family(ua_string)
|
28
|
-
=> "
|
29
|
-
|
30
|
-
DoubleAgent.browser_sym(ua_string)
|
31
|
-
=> :firefox
|
32
|
-
|
33
|
-
DoubleAgent.browser_family_sym(ua_string)
|
34
|
-
=> :firefox
|
31
|
+
=> "Chromium"
|
35
32
|
|
36
33
|
DoubleAgent.os(ua_string)
|
37
34
|
=> "Ubuntu"
|
@@ -39,22 +36,18 @@ The core parser.
|
|
39
36
|
DoubleAgent.os_family(ua_string)
|
40
37
|
=> "GNU/Linux"
|
41
38
|
|
42
|
-
|
43
|
-
=> :ubuntu
|
44
|
-
|
45
|
-
DoubleAgent.os_family_sym(ua_string)
|
46
|
-
=> :linux
|
39
|
+
See the DoubleAgent module for more.
|
47
40
|
|
48
41
|
= Resources
|
49
42
|
|
50
|
-
|
51
|
-
|
43
|
+
DoubleAgent::Resource is a mix-in for objects with a user_agent method or attribute.
|
44
|
+
It gives that class's objects all of the above methods.
|
52
45
|
|
53
46
|
class Login
|
54
47
|
include DoubleAgent::Resource
|
55
48
|
|
56
49
|
def user_agent
|
57
|
-
#returns
|
50
|
+
#returns the user agent string the user logged in with
|
58
51
|
end
|
59
52
|
end
|
60
53
|
|
@@ -68,21 +61,7 @@ the user agent is available through user_agent, and presto!
|
|
68
61
|
|
69
62
|
= Stats
|
70
63
|
|
71
|
-
|
72
|
-
|
73
|
-
== Example 1
|
74
|
-
|
75
|
-
logins = Login.all
|
76
|
-
stats = DoubleAgent.percentages_for(logins, :browser)
|
77
|
-
|
78
|
-
p stats
|
79
|
-
=> [["Firefox 4", 20.0], ["Internet Explorer 8", 18.0], ...]
|
80
|
-
|
81
|
-
stats.each do |browser, percent|
|
82
|
-
puts "#{browser} - #{percent}%"
|
83
|
-
end
|
84
|
-
|
85
|
-
== Example 2
|
64
|
+
Calculate browser and OS shares for large sets of DoubleAgent::Resource objects with DoubleAgent::percentages_for.
|
86
65
|
|
87
66
|
logins = Login.all
|
88
67
|
stats = DoubleAgent.percentages_for(logins, :browser_family, :os_family)
|
@@ -96,23 +75,15 @@ Figure out what percent use which browser, browser family, os, etc.
|
|
96
75
|
|
97
76
|
= Logs
|
98
77
|
|
99
|
-
DoubleAgent
|
100
|
-
object. It even reads gzipped logs (requires zlib)!
|
101
|
-
|
78
|
+
DoubleAgent::log_entries parses through Apache and Nginx access logs, instantiating each log line into a DoubleAgent::LogEntry
|
79
|
+
object. It even reads gzipped logs (requires zlib)! Since the DoubleAgent::LogEntry class mixes in DoubleAgent::Resource, you
|
80
|
+
can easily calculate browser and/or OS market share on your site.
|
102
81
|
|
103
82
|
require 'double_agent'
|
104
83
|
require 'double_agent/logs'
|
105
84
|
|
106
85
|
entries = DoubleAgent.log_entries("/var/log/nginx/my-site.access.log*")
|
107
|
-
entries
|
108
|
-
puts entry.browser
|
109
|
-
end
|
86
|
+
stats = DoubleAgent.percentages_for(entries, :browser)
|
110
87
|
|
111
|
-
|
112
|
-
Internet Explorer 8
|
113
|
-
Internet Explorer 9
|
114
|
-
Firefox 4
|
115
|
-
Internet Explorer
|
116
|
-
Safari
|
117
|
-
Chrome
|
118
|
-
...
|
88
|
+
p stats
|
89
|
+
=> [["Firefox 4", 20.0], ["Internet Explorer 8", 18.0], ...]
|
data/data/oses.yml
CHANGED
@@ -1,40 +1,3 @@
|
|
1
|
-
- :name: Android
|
2
|
-
:sym: :android
|
3
|
-
:family_sym: :linux
|
4
|
-
:regex: android
|
5
|
-
|
6
|
-
- :name: Ubuntu
|
7
|
-
:sym: :ubuntu
|
8
|
-
:family_sym: :linux
|
9
|
-
:regex: ubuntu
|
10
|
-
|
11
|
-
- :name: Fedora
|
12
|
-
:sym: :fedora
|
13
|
-
:family_sym: :linux
|
14
|
-
:regex: fedora
|
15
|
-
|
16
|
-
- :name: Slackware
|
17
|
-
:sym: :slackware
|
18
|
-
:family_sym: :linux
|
19
|
-
:regex: slackware
|
20
|
-
|
21
|
-
- :name: GNU/Linux
|
22
|
-
:sym: :linux
|
23
|
-
:regex: linux
|
24
|
-
|
25
|
-
- :name: FreeBSD
|
26
|
-
:sym: :freebsd
|
27
|
-
:regex: freebsd
|
28
|
-
|
29
|
-
- :name: iOS
|
30
|
-
:sym: :ios
|
31
|
-
:family_sym: :osx
|
32
|
-
:regex: (iphone)|(ipad)
|
33
|
-
|
34
|
-
- :name: OS X
|
35
|
-
:sym: :osx
|
36
|
-
:regex: macintosh
|
37
|
-
|
38
1
|
- :name: Windows 8
|
39
2
|
:sym: :windows_8
|
40
3
|
:family_sym: :windows
|
@@ -63,6 +26,43 @@
|
|
63
26
|
:sym: :windows
|
64
27
|
:regex: windows
|
65
28
|
|
29
|
+
- :name: iOS
|
30
|
+
:sym: :ios
|
31
|
+
:family_sym: :osx
|
32
|
+
:regex: (iphone)|(ipad)
|
33
|
+
|
34
|
+
- :name: OS X
|
35
|
+
:sym: :osx
|
36
|
+
:regex: macintosh
|
37
|
+
|
38
|
+
- :name: Android
|
39
|
+
:sym: :android
|
40
|
+
:family_sym: :linux
|
41
|
+
:regex: android
|
42
|
+
|
43
|
+
- :name: Ubuntu
|
44
|
+
:sym: :ubuntu
|
45
|
+
:family_sym: :linux
|
46
|
+
:regex: ubuntu
|
47
|
+
|
48
|
+
- :name: Fedora
|
49
|
+
:sym: :fedora
|
50
|
+
:family_sym: :linux
|
51
|
+
:regex: fedora
|
52
|
+
|
53
|
+
- :name: Slackware
|
54
|
+
:sym: :slackware
|
55
|
+
:family_sym: :linux
|
56
|
+
:regex: slackware
|
57
|
+
|
58
|
+
- :name: GNU/Linux
|
59
|
+
:sym: :linux
|
60
|
+
:regex: linux
|
61
|
+
|
62
|
+
- :name: FreeBSD
|
63
|
+
:sym: :freebsd
|
64
|
+
:regex: freebsd
|
65
|
+
|
66
66
|
- :name: BlackBerry
|
67
67
|
:sym: :blackberry
|
68
68
|
:regex: blackberry
|
data/lib/double_agent/logs.rb
CHANGED
@@ -4,16 +4,18 @@ module DoubleAgent
|
|
4
4
|
# Accepts a glob path like /var/logs/apache/my-site.access.log*,
|
5
5
|
# parses all matching files into an array of LegEntry objects, and returns them.
|
6
6
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
def self.log_entries(glob_str,
|
10
|
-
|
7
|
+
# options[:match] and options[:ignore] can each take a regular expression,
|
8
|
+
# ignoring lines that do and don't match, respectively.
|
9
|
+
def self.log_entries(glob_str, options={})
|
10
|
+
gz_regexp = /\.gz\Z/i
|
11
|
+
entries = []
|
12
|
+
parse = (options[:match] or options[:ignore]) \
|
13
|
+
? lambda { |line| entries << LogEntry.new(line) if (options[:match].nil? or line =~ options[:match]) and (options[:ignore].nil? or line !~ options[:ignore]) } \
|
14
|
+
: lambda { |line| entries << LogEntry.new(line) }
|
11
15
|
Dir.glob(glob_str).each do |f|
|
12
|
-
File.open(f) do |file|
|
16
|
+
File.open(f, 'r') do |file|
|
13
17
|
handle = f =~ gz_regexp ? Zlib::GzipReader.new(file) : file
|
14
|
-
|
15
|
-
entries << LogEntry.new(line) if regex.nil? or line =~ regex
|
16
|
-
end
|
18
|
+
handle.readlines.each &parse
|
17
19
|
end
|
18
20
|
end
|
19
21
|
entries
|
@@ -25,16 +27,14 @@ module DoubleAgent
|
|
25
27
|
|
26
28
|
class LogEntry
|
27
29
|
# Regular expression for pulling a user agent string out of a log entry
|
28
|
-
USER_AGENT_REGEXP = /
|
29
|
-
include DoubleAgent::Resource
|
30
|
-
|
30
|
+
USER_AGENT_REGEXP = /" ".+$/
|
31
|
+
include DoubleAgent::Resource
|
31
32
|
# Returns the user agent string
|
32
33
|
attr_reader :user_agent
|
33
34
|
|
34
35
|
# Initializes a new LogEntry object. An Apache or Nginx log line should be
|
35
36
|
# passed to it.
|
36
37
|
def initialize(line)
|
37
|
-
#@line = line
|
38
38
|
@user_agent = line.slice(USER_AGENT_REGEXP)
|
39
39
|
end
|
40
40
|
end
|
data/lib/double_agent/stats.rb
CHANGED
@@ -15,7 +15,10 @@ module DoubleAgent
|
|
15
15
|
#
|
16
16
|
# "things" is an array of objects who's classes "include DoubleAgent::Resource".
|
17
17
|
#
|
18
|
-
# "
|
18
|
+
# "args" is one or more method symbols from DoubleAgent::Resource.
|
19
|
+
#
|
20
|
+
# "args" may have, as it's last member, :threshold => n, where n is the lowest
|
21
|
+
# percentage you want returned.
|
19
22
|
#
|
20
23
|
# Example, Browser Family share:
|
21
24
|
# DoubleAgent.percentages_for(logins, :browser_family)
|
@@ -24,11 +27,12 @@ module DoubleAgent
|
|
24
27
|
# Example, Browser/OS share, asking for symbols back:
|
25
28
|
# DoubleAgent.percentages_for(server_log_entries, :browser_sym, :os_sym)
|
26
29
|
# [[:firefox, :windows_7, 50.4], [:chrome, :osx, 19.6], [:msie, :windows_xp, 15], [:safari, :osx, 10], [:other, :other, 5]]
|
27
|
-
def self.percentages_for(things, *
|
30
|
+
def self.percentages_for(things, *args)
|
31
|
+
options = args.last.is_a?(Hash) ? args.pop : {} # Break out options
|
28
32
|
p = {}
|
29
33
|
things.each do |h|
|
30
|
-
syms =
|
31
|
-
p[syms]
|
34
|
+
syms = args.map { |attr| h.send attr }
|
35
|
+
p[syms] ||= 0
|
32
36
|
p[syms] += 1
|
33
37
|
end
|
34
38
|
size = things.size.to_f
|
@@ -39,6 +43,7 @@ module DoubleAgent
|
|
39
43
|
p.collect! { |k,n| [*k.<<(((n * 100) / size).round(2))] }
|
40
44
|
end
|
41
45
|
p.sort! { |a,b| b.last <=> a.last }
|
46
|
+
p.reject! { |a| a.last < options[:threshold] } if options[:threshold]
|
42
47
|
p
|
43
48
|
end
|
44
49
|
end
|
data/spec/stats_spec.rb
CHANGED
@@ -1,22 +1,37 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/spec_helper'
|
2
2
|
|
3
3
|
log_glob = File.dirname(__FILE__) + '/data/*.access.log*'
|
4
|
-
entries = DoubleAgent::log_entries(log_glob, /^\d/)
|
4
|
+
entries = DoubleAgent::log_entries(log_glob, :match => /^\d/)
|
5
5
|
|
6
6
|
describe DoubleAgent do
|
7
7
|
context 'Logs' do
|
8
8
|
it 'should have loaded n log entries' do
|
9
9
|
entries.size.should == 47
|
10
10
|
end
|
11
|
+
|
12
|
+
it 'should have loaded n log entries' do
|
13
|
+
DoubleAgent::log_entries(log_glob, :match => /^\d/, :ignore => %r{ /dashboard }).size.should == 44
|
14
|
+
end
|
11
15
|
end
|
12
16
|
|
13
17
|
context 'Stats' do
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
it 'should calculate stats' do
|
19
|
+
stats = DoubleAgent.percentages_for entries, :browser_family, :os_family
|
20
|
+
answer = [["Internet Explorer", "Windows", 42.55],
|
21
|
+
["Chromium", "GNU/Linux", 40.43],
|
22
|
+
["Firefox", "GNU/Linux", 10.64],
|
23
|
+
["Firefox", "OS X", 4.26],
|
24
|
+
["Safari", "OS X", 2.13]]
|
25
|
+
stats.should == answer
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should ignore stats below the threshold' do
|
29
|
+
stats = DoubleAgent.percentages_for entries, :browser_family, :os_family, :threshold => 3.0
|
30
|
+
answer = [["Internet Explorer", "Windows", 42.55],
|
31
|
+
["Chromium", "GNU/Linux", 40.43],
|
32
|
+
["Firefox", "GNU/Linux", 10.64],
|
33
|
+
["Firefox", "OS X", 4.26]]
|
34
|
+
stats.should == answer
|
35
|
+
end
|
21
36
|
end
|
22
37
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: double_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.1.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jordan Hollinger
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-05-10 00:00:00 Z
|
14
14
|
dependencies: []
|
15
15
|
|
16
16
|
description: Browser User Agent string parser with resource, stats, and a log reader
|