special_agent 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ require "special_agent"
2
+ require 'rexml/document'
3
+
4
+ task :test_bots do
5
+ success = 0
6
+ fail = 0
7
+
8
+ # tests for successful bot detection
9
+ bot_user_agent_strings = [
10
+ "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)",
11
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
12
+ ]
13
+ browser_user_agent_strings = [
14
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_5; sv-se) AppleWebKit/525.26.2 (KHTML, like Gecko) Version/3.2 Safari/525.26.12",
15
+ "Mozilla/5.0 (iPhone; U; Linux i686; pt-br) AppleWebKit/532+ (KHTML, like Gecko) Version/3.0 Mobile/1A538b Safari/419.3 Midori/0.2.0",
16
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_1 like Mac OS X; zh-tw) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8G4 Safari/6533.18.5",
17
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
18
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-us) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
19
+ "Mozilla/5.0 (Windows; U; Windows NT 6.1; tr-TR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
20
+ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
21
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; InfoPath.3; MS-RTC LM 8; .NET4.0C; .NET4.0E)",
22
+ "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0",
23
+ "Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
24
+ "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
25
+ "Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11"
26
+ ]
27
+ puts
28
+ puts "parsing the user-agents.org list..."
29
+ doc = REXML::Document.new(File.open("lib/tasks/allagents.xml"))
30
+ doc.elements.each("user-agents/user-agent") do |ua|
31
+ bot = false
32
+ browser = false
33
+ ua.elements.each("Type") do |item|
34
+ if item.text == "R" || item.text == "S"
35
+ bot = true
36
+ elsif item.text == "B"
37
+ browser = true
38
+ end
39
+
40
+ end
41
+ if bot
42
+ ua.elements.each("String") do |item|
43
+ bot_user_agent_strings << item.text
44
+ end
45
+ end
46
+ if browser
47
+ ua.elements.each("String") do |item|
48
+ browser_user_agent_strings << item.text
49
+ end
50
+ end
51
+ end
52
+ puts "testing..."
53
+ bot_user_agent_strings.each do |ua_str|
54
+ ua = SpecialAgent::UserAgent.new(ua_str)
55
+ if ua.is_bot?
56
+ #puts "SUCCESS:: " + ua_str
57
+ success += 1
58
+ else
59
+ #puts "FAIL:: " + ua_str
60
+ fail += 1
61
+ end
62
+ end
63
+
64
+ puts "BOT DETECTION"
65
+ puts success.to_s + " succeed. BENCHMARK: 376"
66
+ puts fail.to_s + " fail. BENCHMARK: 1083"
67
+
68
+ success = 0
69
+ fail = 0
70
+
71
+ browser_user_agent_strings.each do |ua_str|
72
+ ua = SpecialAgent::UserAgent.new(ua_str)
73
+ if !ua.is_bot?
74
+ #puts "SUCCESS:: " + ua_str
75
+ success += 1
76
+ else
77
+ #puts "FAIL:: " + ua_str
78
+ fail += 1
79
+ end
80
+ end
81
+
82
+ puts "FALSE POSITIVES CHECK"
83
+ puts success.to_s + " succeed. BENCHMARK: 326"
84
+ puts fail.to_s + " fail. BENCHMARK: 4"
85
+
86
+ end
@@ -0,0 +1,16 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+
11
+ require File.join(File.dirname(__FILE__), "user_agent_matcher")
12
+
13
+
14
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
15
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
16
+ require 'special_agent'
@@ -0,0 +1,16 @@
1
+ class RSpec::Core::ExampleGroup
2
+
3
+ class << self
4
+
5
+ def detect(user_agent, &block)
6
+ klass = self.describes
7
+ subject = klass.new(user_agent)
8
+ it(user_agent) do
9
+ yield subject
10
+ end
11
+ end
12
+
13
+ end
14
+ end
15
+
16
+
@@ -0,0 +1,85 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe SpecialAgent::UserAgent do
4
+
5
+ describe "Safari" do
6
+ detect "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1" do |ua|
7
+ ua.device.type.should == :computer
8
+ ua.device.name.should == "Computer"
9
+ ua.device.version.should == nil
10
+
11
+ ua.device.operating_system.name.should == "Mac OS X"
12
+ ua.device.operating_system.version.to_s.should == "10.6.8"
13
+
14
+ ua.device.platform.name.should == "Macintosh"
15
+ ua.device.platform.version.should == nil
16
+
17
+ ua.device.engine.name.should == "AppleWebKit"
18
+ ua.device.engine.version.to_s.should == "533.21.1"
19
+
20
+ ua.device.engine.browser.name.should == "Safari"
21
+ ua.device.engine.browser.version.to_s.should == "533.21.1"
22
+ end
23
+
24
+ detect "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) AppleWebKit/535.5 (KHTML, like Gecko) Chrome/16.0.891.1 Safari/535.5" do |ua|
25
+ ua.device.type.should == :computer
26
+ ua.device.name.should == "Computer"
27
+ ua.device.version.should == nil
28
+
29
+ ua.device.operating_system.name.should == "Mac OS X"
30
+ ua.device.operating_system.version.to_s.should == "10.7.1"
31
+
32
+ ua.device.platform.name.should == "Macintosh"
33
+ ua.device.platform.version.should == nil
34
+
35
+ ua.device.engine.name.should == "AppleWebKit"
36
+ ua.device.engine.version.to_s.should == "535.5"
37
+
38
+ ua.device.engine.browser.name.should == "Safari"
39
+ ua.device.engine.browser.version.to_s.should == "535.5"
40
+ end
41
+ end
42
+
43
+ describe "Firefox" do
44
+ detect "Mozilla/5.0 (X11; Linux i686; rv:6.0) Gecko/20100101 Firefox/6.0" do |ua|
45
+ ua.device.type.should == :computer
46
+ ua.device.name.should == "Computer"
47
+ ua.device.version.should == nil
48
+
49
+ ua.device.operating_system.name.should == "Linux"
50
+ ua.device.operating_system.version.to_s.should == "i686"
51
+
52
+ ua.device.platform.name.should == "PC"
53
+ ua.device.platform.version.should == nil
54
+
55
+ ua.device.engine.name.should == "Gecko"
56
+ ua.device.engine.version.to_s.should == "20100101"
57
+
58
+ # TODO should be Chrome.
59
+ ua.device.engine.browser.name.should == "Firefox"
60
+ ua.device.engine.browser.version.to_s.should == "6.0"
61
+ end
62
+ end
63
+
64
+ describe "Chrome" do
65
+ detect "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.872.0 Safari/535.2" do |ua|
66
+ ua.device.type.should == :computer
67
+ ua.device.name.should == "Computer"
68
+ ua.device.version.should == nil
69
+
70
+ ua.device.operating_system.name.should == "Windows"
71
+ ua.device.operating_system.version.to_s.should == "5.1"
72
+
73
+ ua.device.platform.name.should == "PC"
74
+ ua.device.platform.version.should == nil
75
+
76
+ ua.device.engine.name.should == "AppleWebKit"
77
+ ua.device.engine.version.to_s.should == "535.2"
78
+
79
+ # TODO should be Chrome.
80
+ ua.device.engine.browser.name.should == "Safari"
81
+ ua.device.engine.browser.version.to_s.should == "535.2"
82
+ end
83
+ end
84
+
85
+ end
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'rake'
3
+ $:.push File.expand_path("../lib", __FILE__)
4
+ require "special_agent/version"
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "special_agent"
8
+ s.version = SpecialAgent::VERSION
9
+ s.authors = ["Zeke Sikelianos"]
10
+ s.email = ["zeke@sikelianos.com"]
11
+ s.homepage = "http://github.com/zeke/special_agent"
12
+ s.summary = %q{Parse and process User Agents like a secret one}
13
+ s.description = %q{Parse and process User Agents like a secret one}
14
+
15
+ s.add_development_dependency "rake"
16
+ s.add_development_dependency "rspec"
17
+ s.add_development_dependency "bundler"
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
+ s.require_paths = ["lib"]
23
+ end
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "special_agent/user_agent"
4
+
5
+ user_agent_strings = [
6
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_5; sv-se) AppleWebKit/525.26.2 (KHTML, like Gecko) Version/3.2 Safari/525.26.12",
7
+ "Mozilla/5.0 (iPhone; U; Linux i686; pt-br) AppleWebKit/532+ (KHTML, like Gecko) Version/3.0 Mobile/1A538b Safari/419.3 Midori/0.2.0",
8
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_1 like Mac OS X; zh-tw) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8G4 Safari/6533.18.5",
9
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
10
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-us) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
11
+ "Mozilla/5.0 (Windows; U; Windows NT 6.1; tr-TR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
12
+ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
13
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; InfoPath.3; MS-RTC LM 8; .NET4.0C; .NET4.0E)",
14
+ "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0",
15
+ "Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
16
+ "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
17
+ "Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11",
18
+ "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)",
19
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
20
+ ]
21
+
22
+ def process_user_agent(ua_str)
23
+ puts
24
+ puts '='*120
25
+ puts "Original User Agent:"
26
+ puts " #{ua_str}"
27
+ puts
28
+ ua = SpecialAgent::UserAgent.new(ua_str)
29
+ puts "Parsed Object String:"
30
+ puts " #{ua}"
31
+ puts '='*120
32
+ end
33
+
34
+ SpecialAgent::DEBUG = true
35
+ SpecialAgent::DEBUG_LEVEL = 2
36
+
37
+ user_agent_strings.each do |ua_str|
38
+ process_user_agent(ua_str)
39
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: special_agent
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Zeke Sikelianos
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-05 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: &70202056269220 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70202056269220
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &70202056268760 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *70202056268760
36
+ - !ruby/object:Gem::Dependency
37
+ name: bundler
38
+ requirement: &70202056268320 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *70202056268320
47
+ description: Parse and process User Agents like a secret one
48
+ email:
49
+ - zeke@sikelianos.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - README.md
57
+ - Rakefile
58
+ - lib/special_agent.rb
59
+ - lib/special_agent/base.rb
60
+ - lib/special_agent/browser.rb
61
+ - lib/special_agent/device.rb
62
+ - lib/special_agent/engine.rb
63
+ - lib/special_agent/operating_system.rb
64
+ - lib/special_agent/platform.rb
65
+ - lib/special_agent/user_agent.rb
66
+ - lib/special_agent/version.rb
67
+ - lib/tasks/allagents.xml
68
+ - lib/tasks/bot_agent_test.rake
69
+ - spec/spec_helper.rb
70
+ - spec/user_agent_matcher.rb
71
+ - spec/user_agent_spec.rb
72
+ - special_agent.gemspec
73
+ - special_agent_example
74
+ homepage: http://github.com/zeke/special_agent
75
+ licenses: []
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ required_rubygems_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 1.8.10
95
+ signing_key:
96
+ specification_version: 3
97
+ summary: Parse and process User Agents like a secret one
98
+ test_files:
99
+ - spec/spec_helper.rb
100
+ - spec/user_agent_matcher.rb
101
+ - spec/user_agent_spec.rb