special_agent 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/README.md +193 -0
- data/Rakefile +14 -0
- data/lib/special_agent.rb +8 -0
- data/lib/special_agent/base.rb +58 -0
- data/lib/special_agent/browser.rb +61 -0
- data/lib/special_agent/device.rb +97 -0
- data/lib/special_agent/engine.rb +65 -0
- data/lib/special_agent/operating_system.rb +69 -0
- data/lib/special_agent/platform.rb +75 -0
- data/lib/special_agent/user_agent.rb +82 -0
- data/lib/special_agent/version.rb +37 -0
- data/lib/tasks/allagents.xml +22170 -0
- data/lib/tasks/bot_agent_test.rake +86 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/user_agent_matcher.rb +16 -0
- data/spec/user_agent_spec.rb +85 -0
- data/special_agent.gemspec +23 -0
- data/special_agent_example +39 -0
- metadata +101 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
require "special_agent"
|
2
|
+
require 'rexml/document'
|
3
|
+
|
4
|
+
task :test_bots do
|
5
|
+
success = 0
|
6
|
+
fail = 0
|
7
|
+
|
8
|
+
# tests for successful bot detection
|
9
|
+
bot_user_agent_strings = [
|
10
|
+
"ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)",
|
11
|
+
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
12
|
+
]
|
13
|
+
browser_user_agent_strings = [
|
14
|
+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_5; sv-se) AppleWebKit/525.26.2 (KHTML, like Gecko) Version/3.2 Safari/525.26.12",
|
15
|
+
"Mozilla/5.0 (iPhone; U; Linux i686; pt-br) AppleWebKit/532+ (KHTML, like Gecko) Version/3.0 Mobile/1A538b Safari/419.3 Midori/0.2.0",
|
16
|
+
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_1 like Mac OS X; zh-tw) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8G4 Safari/6533.18.5",
|
17
|
+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
|
18
|
+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-us) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
|
19
|
+
"Mozilla/5.0 (Windows; U; Windows NT 6.1; tr-TR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
|
20
|
+
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
|
21
|
+
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; InfoPath.3; MS-RTC LM 8; .NET4.0C; .NET4.0E)",
|
22
|
+
"Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0",
|
23
|
+
"Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
|
24
|
+
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
|
25
|
+
"Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11"
|
26
|
+
]
|
27
|
+
puts
|
28
|
+
puts "parsing the user-agents.org list..."
|
29
|
+
doc = REXML::Document.new(File.open("lib/tasks/allagents.xml"))
|
30
|
+
doc.elements.each("user-agents/user-agent") do |ua|
|
31
|
+
bot = false
|
32
|
+
browser = false
|
33
|
+
ua.elements.each("Type") do |item|
|
34
|
+
if item.text == "R" || item.text == "S"
|
35
|
+
bot = true
|
36
|
+
elsif item.text == "B"
|
37
|
+
browser = true
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
if bot
|
42
|
+
ua.elements.each("String") do |item|
|
43
|
+
bot_user_agent_strings << item.text
|
44
|
+
end
|
45
|
+
end
|
46
|
+
if browser
|
47
|
+
ua.elements.each("String") do |item|
|
48
|
+
browser_user_agent_strings << item.text
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
puts "testing..."
|
53
|
+
bot_user_agent_strings.each do |ua_str|
|
54
|
+
ua = SpecialAgent::UserAgent.new(ua_str)
|
55
|
+
if ua.is_bot?
|
56
|
+
#puts "SUCCESS:: " + ua_str
|
57
|
+
success += 1
|
58
|
+
else
|
59
|
+
#puts "FAIL:: " + ua_str
|
60
|
+
fail += 1
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
puts "BOT DETECTION"
|
65
|
+
puts success.to_s + " succeed. BENCHMARK: 376"
|
66
|
+
puts fail.to_s + " fail. BENCHMARK: 1083"
|
67
|
+
|
68
|
+
success = 0
|
69
|
+
fail = 0
|
70
|
+
|
71
|
+
browser_user_agent_strings.each do |ua_str|
|
72
|
+
ua = SpecialAgent::UserAgent.new(ua_str)
|
73
|
+
if !ua.is_bot?
|
74
|
+
#puts "SUCCESS:: " + ua_str
|
75
|
+
success += 1
|
76
|
+
else
|
77
|
+
#puts "FAIL:: " + ua_str
|
78
|
+
fail += 1
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
puts "FALSE POSITIVES CHECK"
|
83
|
+
puts success.to_s + " succeed. BENCHMARK: 326"
|
84
|
+
puts fail.to_s + " fail. BENCHMARK: 4"
|
85
|
+
|
86
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
|
11
|
+
require File.join(File.dirname(__FILE__), "user_agent_matcher")
|
12
|
+
|
13
|
+
|
14
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
15
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
16
|
+
require 'special_agent'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe SpecialAgent::UserAgent do
|
4
|
+
|
5
|
+
describe "Safari" do
|
6
|
+
detect "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1" do |ua|
|
7
|
+
ua.device.type.should == :computer
|
8
|
+
ua.device.name.should == "Computer"
|
9
|
+
ua.device.version.should == nil
|
10
|
+
|
11
|
+
ua.device.operating_system.name.should == "Mac OS X"
|
12
|
+
ua.device.operating_system.version.to_s.should == "10.6.8"
|
13
|
+
|
14
|
+
ua.device.platform.name.should == "Macintosh"
|
15
|
+
ua.device.platform.version.should == nil
|
16
|
+
|
17
|
+
ua.device.engine.name.should == "AppleWebKit"
|
18
|
+
ua.device.engine.version.to_s.should == "533.21.1"
|
19
|
+
|
20
|
+
ua.device.engine.browser.name.should == "Safari"
|
21
|
+
ua.device.engine.browser.version.to_s.should == "533.21.1"
|
22
|
+
end
|
23
|
+
|
24
|
+
detect "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) AppleWebKit/535.5 (KHTML, like Gecko) Chrome/16.0.891.1 Safari/535.5" do |ua|
|
25
|
+
ua.device.type.should == :computer
|
26
|
+
ua.device.name.should == "Computer"
|
27
|
+
ua.device.version.should == nil
|
28
|
+
|
29
|
+
ua.device.operating_system.name.should == "Mac OS X"
|
30
|
+
ua.device.operating_system.version.to_s.should == "10.7.1"
|
31
|
+
|
32
|
+
ua.device.platform.name.should == "Macintosh"
|
33
|
+
ua.device.platform.version.should == nil
|
34
|
+
|
35
|
+
ua.device.engine.name.should == "AppleWebKit"
|
36
|
+
ua.device.engine.version.to_s.should == "535.5"
|
37
|
+
|
38
|
+
ua.device.engine.browser.name.should == "Safari"
|
39
|
+
ua.device.engine.browser.version.to_s.should == "535.5"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "Firefox" do
|
44
|
+
detect "Mozilla/5.0 (X11; Linux i686; rv:6.0) Gecko/20100101 Firefox/6.0" do |ua|
|
45
|
+
ua.device.type.should == :computer
|
46
|
+
ua.device.name.should == "Computer"
|
47
|
+
ua.device.version.should == nil
|
48
|
+
|
49
|
+
ua.device.operating_system.name.should == "Linux"
|
50
|
+
ua.device.operating_system.version.to_s.should == "i686"
|
51
|
+
|
52
|
+
ua.device.platform.name.should == "PC"
|
53
|
+
ua.device.platform.version.should == nil
|
54
|
+
|
55
|
+
ua.device.engine.name.should == "Gecko"
|
56
|
+
ua.device.engine.version.to_s.should == "20100101"
|
57
|
+
|
58
|
+
# TODO should be Chrome.
|
59
|
+
ua.device.engine.browser.name.should == "Firefox"
|
60
|
+
ua.device.engine.browser.version.to_s.should == "6.0"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
describe "Chrome" do
|
65
|
+
detect "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.872.0 Safari/535.2" do |ua|
|
66
|
+
ua.device.type.should == :computer
|
67
|
+
ua.device.name.should == "Computer"
|
68
|
+
ua.device.version.should == nil
|
69
|
+
|
70
|
+
ua.device.operating_system.name.should == "Windows"
|
71
|
+
ua.device.operating_system.version.to_s.should == "5.1"
|
72
|
+
|
73
|
+
ua.device.platform.name.should == "PC"
|
74
|
+
ua.device.platform.version.should == nil
|
75
|
+
|
76
|
+
ua.device.engine.name.should == "AppleWebKit"
|
77
|
+
ua.device.engine.version.to_s.should == "535.2"
|
78
|
+
|
79
|
+
# TODO should be Chrome.
|
80
|
+
ua.device.engine.browser.name.should == "Safari"
|
81
|
+
ua.device.engine.browser.version.to_s.should == "535.2"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'rake'
|
3
|
+
$:.push File.expand_path("../lib", __FILE__)
|
4
|
+
require "special_agent/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "special_agent"
|
8
|
+
s.version = SpecialAgent::VERSION
|
9
|
+
s.authors = ["Zeke Sikelianos"]
|
10
|
+
s.email = ["zeke@sikelianos.com"]
|
11
|
+
s.homepage = "http://github.com/zeke/special_agent"
|
12
|
+
s.summary = %q{Parse and process User Agents like a secret one}
|
13
|
+
s.description = %q{Parse and process User Agents like a secret one}
|
14
|
+
|
15
|
+
s.add_development_dependency "rake"
|
16
|
+
s.add_development_dependency "rspec"
|
17
|
+
s.add_development_dependency "bundler"
|
18
|
+
|
19
|
+
s.files = `git ls-files`.split("\n")
|
20
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
21
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
22
|
+
s.require_paths = ["lib"]
|
23
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "special_agent/user_agent"
|
4
|
+
|
5
|
+
user_agent_strings = [
|
6
|
+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_5; sv-se) AppleWebKit/525.26.2 (KHTML, like Gecko) Version/3.2 Safari/525.26.12",
|
7
|
+
"Mozilla/5.0 (iPhone; U; Linux i686; pt-br) AppleWebKit/532+ (KHTML, like Gecko) Version/3.0 Mobile/1A538b Safari/419.3 Midori/0.2.0",
|
8
|
+
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_1 like Mac OS X; zh-tw) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8G4 Safari/6533.18.5",
|
9
|
+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
|
10
|
+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-us) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
|
11
|
+
"Mozilla/5.0 (Windows; U; Windows NT 6.1; tr-TR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
|
12
|
+
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
|
13
|
+
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; InfoPath.3; MS-RTC LM 8; .NET4.0C; .NET4.0E)",
|
14
|
+
"Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0",
|
15
|
+
"Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
|
16
|
+
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
|
17
|
+
"Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11",
|
18
|
+
"ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)",
|
19
|
+
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
20
|
+
]
|
21
|
+
|
22
|
+
def process_user_agent(ua_str)
|
23
|
+
puts
|
24
|
+
puts '='*120
|
25
|
+
puts "Original User Agent:"
|
26
|
+
puts " #{ua_str}"
|
27
|
+
puts
|
28
|
+
ua = SpecialAgent::UserAgent.new(ua_str)
|
29
|
+
puts "Parsed Object String:"
|
30
|
+
puts " #{ua}"
|
31
|
+
puts '='*120
|
32
|
+
end
|
33
|
+
|
34
|
+
SpecialAgent::DEBUG = true
|
35
|
+
SpecialAgent::DEBUG_LEVEL = 2
|
36
|
+
|
37
|
+
user_agent_strings.each do |ua_str|
|
38
|
+
process_user_agent(ua_str)
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: special_agent
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.3
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Zeke Sikelianos
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-12-05 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: &70202056269220 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70202056269220
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &70202056268760 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70202056268760
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: bundler
|
38
|
+
requirement: &70202056268320 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70202056268320
|
47
|
+
description: Parse and process User Agents like a secret one
|
48
|
+
email:
|
49
|
+
- zeke@sikelianos.com
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- .gitignore
|
55
|
+
- Gemfile
|
56
|
+
- README.md
|
57
|
+
- Rakefile
|
58
|
+
- lib/special_agent.rb
|
59
|
+
- lib/special_agent/base.rb
|
60
|
+
- lib/special_agent/browser.rb
|
61
|
+
- lib/special_agent/device.rb
|
62
|
+
- lib/special_agent/engine.rb
|
63
|
+
- lib/special_agent/operating_system.rb
|
64
|
+
- lib/special_agent/platform.rb
|
65
|
+
- lib/special_agent/user_agent.rb
|
66
|
+
- lib/special_agent/version.rb
|
67
|
+
- lib/tasks/allagents.xml
|
68
|
+
- lib/tasks/bot_agent_test.rake
|
69
|
+
- spec/spec_helper.rb
|
70
|
+
- spec/user_agent_matcher.rb
|
71
|
+
- spec/user_agent_spec.rb
|
72
|
+
- special_agent.gemspec
|
73
|
+
- special_agent_example
|
74
|
+
homepage: http://github.com/zeke/special_agent
|
75
|
+
licenses: []
|
76
|
+
post_install_message:
|
77
|
+
rdoc_options: []
|
78
|
+
require_paths:
|
79
|
+
- lib
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ! '>='
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
requirements: []
|
93
|
+
rubyforge_project:
|
94
|
+
rubygems_version: 1.8.10
|
95
|
+
signing_key:
|
96
|
+
specification_version: 3
|
97
|
+
summary: Parse and process User Agents like a secret one
|
98
|
+
test_files:
|
99
|
+
- spec/spec_helper.rb
|
100
|
+
- spec/user_agent_matcher.rb
|
101
|
+
- spec/user_agent_spec.rb
|