voight_kampff 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/config/initializers/extend_action_dispatch_requset.rb +46 -0
- data/config/user_agents.yml +12438 -0
- data/lib/voight_kampff.rb +54 -0
- data/lib/voight_kampff/engine.rb +13 -0
- data/lib/voight_kampff/test.rb +76 -0
- data/lib/voight_kampff/user_agents_parser.rb +54 -0
- data/lib/voight_kampff/version.rb +5 -0
- metadata +72 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'voight_kampff/test'
|
2
|
+
require 'voight_kampff/user_agents_parser'
|
3
|
+
require 'voight_kampff/engine' if defined?(Rails)
|
4
|
+
|
5
|
+
module VoightKampff
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def root
|
10
|
+
Pathname.new File.expand_path '..', File.dirname(__FILE__)
|
11
|
+
end
|
12
|
+
|
13
|
+
def human?(user_agent_string)
|
14
|
+
test(user_agent_string).human?
|
15
|
+
end
|
16
|
+
|
17
|
+
def bot?(user_agent_string)
|
18
|
+
test(user_agent_string).bot?
|
19
|
+
end
|
20
|
+
alias :replicant? :bot?
|
21
|
+
|
22
|
+
def browser?(user_agent_string)
|
23
|
+
test(user_agent_string).browser?
|
24
|
+
end
|
25
|
+
|
26
|
+
def checker?(user_agent_string)
|
27
|
+
test(user_agent_string).checker?
|
28
|
+
end
|
29
|
+
|
30
|
+
def downloader?(user_agent_string)
|
31
|
+
test(user_agent_string).downloader?
|
32
|
+
end
|
33
|
+
|
34
|
+
def proxy?(user_agent_string)
|
35
|
+
test(user_agent_string).proxy?
|
36
|
+
end
|
37
|
+
|
38
|
+
def crawler?(user_agent_string)
|
39
|
+
test(user_agent_string).crawler?
|
40
|
+
end
|
41
|
+
|
42
|
+
def spam?(user_agent_string)
|
43
|
+
test(user_agent_string).spam?
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def test(user_agent_string)
|
49
|
+
VoightKampff::Test.new(user_agent_string)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module VoightKampff
|
2
|
+
|
3
|
+
class Test
|
4
|
+
|
5
|
+
attr_accessor :user_agent_string
|
6
|
+
attr_accessor :types
|
7
|
+
|
8
|
+
def initialize(user_agent_string)
|
9
|
+
load_agents
|
10
|
+
@user_agent_string = user_agent_string
|
11
|
+
@types = Array(agent['types']).map(&:to_sym)
|
12
|
+
@description = agent['description']
|
13
|
+
end
|
14
|
+
|
15
|
+
def agent
|
16
|
+
@agent ||= @@agents.find do |agent|
|
17
|
+
if agent['string_match'] == 'regex'
|
18
|
+
@user_agent_string =~ Regexp.new(agent['string'], Regexp::IGNORECASE)
|
19
|
+
else
|
20
|
+
@user_agent_string.casecmp(agent['string']) == 0
|
21
|
+
end
|
22
|
+
end || {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def has_type?(*types)
|
26
|
+
return nil if agent.empty?
|
27
|
+
types.any? do |type|
|
28
|
+
@types.include? type
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def human?
|
33
|
+
has_type? :browser, :downloader, :proxy
|
34
|
+
end
|
35
|
+
|
36
|
+
def bot?
|
37
|
+
has_type? :checker, :crawler, :spam
|
38
|
+
end
|
39
|
+
alias :replicant? :bot?
|
40
|
+
|
41
|
+
def browser?
|
42
|
+
has_type? :browser
|
43
|
+
end
|
44
|
+
|
45
|
+
def checker?
|
46
|
+
has_type? :checker
|
47
|
+
end
|
48
|
+
|
49
|
+
def downloader?
|
50
|
+
has_type? :downloader
|
51
|
+
end
|
52
|
+
|
53
|
+
def proxy?
|
54
|
+
has_type? :proxy
|
55
|
+
end
|
56
|
+
|
57
|
+
def crawler?
|
58
|
+
has_type? :crawler
|
59
|
+
end
|
60
|
+
|
61
|
+
def spam?
|
62
|
+
has_type? :spam
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def load_agents
|
68
|
+
rel_path = ['config', 'user_agents.yml']
|
69
|
+
abs_path = Rails.root.join(*rel_path)
|
70
|
+
abs_path = VoightKampff.root.join(*rel_path) unless File.exists? abs_path
|
71
|
+
@@agents ||= YAML.load(File.open(abs_path, 'r'))
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module VoightKampff
|
2
|
+
|
3
|
+
class UserAgentsParser
|
4
|
+
|
5
|
+
# To generate a new user_agents.yml file run rake voight_kampff:import_user_agents
|
6
|
+
# This class is designed to work with http://www.user-agents.org/allagents.xml
|
7
|
+
|
8
|
+
require 'nokogiri'
|
9
|
+
require 'httpclient'
|
10
|
+
|
11
|
+
def initialize(url)
|
12
|
+
@url = url
|
13
|
+
@xml = HTTPClient.new.get_content(url)
|
14
|
+
end
|
15
|
+
|
16
|
+
def agents
|
17
|
+
agents = []
|
18
|
+
xml = Nokogiri::XML(@xml)
|
19
|
+
xml.xpath('//user-agent').each do |agent|
|
20
|
+
agents << {
|
21
|
+
'string' => agent.xpath('String').text,
|
22
|
+
'string_match' => 'exact', # exact or regex
|
23
|
+
'types' => agent.xpath('Type').text.split.map{|t| clarify_type(t) }.compact,
|
24
|
+
'description' => agent.xpath('Description').text
|
25
|
+
}
|
26
|
+
end
|
27
|
+
agents
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_array
|
31
|
+
agents
|
32
|
+
end
|
33
|
+
alias :to_a :to_array
|
34
|
+
|
35
|
+
def to_yaml
|
36
|
+
agents.to_yaml
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def clarify_type(type)
|
42
|
+
case type.downcase.to_sym
|
43
|
+
when :b then 'browser'
|
44
|
+
when :c then 'checker'
|
45
|
+
when :d then 'downloader'
|
46
|
+
when :p then 'proxy'
|
47
|
+
when :r then 'crawler'
|
48
|
+
when :s then 'spam'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: voight_kampff
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Adam Crownoble
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-05-11 00:00:00 Z
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: Voight-Kampff is a Ruby gem that detects bots, spiders, crawlers and replicants
|
22
|
+
email: adam@obledesign.com
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files: []
|
28
|
+
|
29
|
+
files:
|
30
|
+
- config/user_agents.yml
|
31
|
+
- config/initializers/extend_action_dispatch_requset.rb
|
32
|
+
- lib/voight_kampff.rb
|
33
|
+
- lib/voight_kampff/user_agents_parser.rb
|
34
|
+
- lib/voight_kampff/version.rb
|
35
|
+
- lib/voight_kampff/test.rb
|
36
|
+
- lib/voight_kampff/engine.rb
|
37
|
+
homepage: https://github.com/adamcrown/Voight-Kampff
|
38
|
+
licenses: []
|
39
|
+
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
hash: 3
|
51
|
+
segments:
|
52
|
+
- 0
|
53
|
+
version: "0"
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
hash: 3
|
60
|
+
segments:
|
61
|
+
- 0
|
62
|
+
version: "0"
|
63
|
+
requirements: []
|
64
|
+
|
65
|
+
rubyforge_project:
|
66
|
+
rubygems_version: 1.7.2
|
67
|
+
signing_key:
|
68
|
+
specification_version: 3
|
69
|
+
summary: Voight-Kampff bot detection
|
70
|
+
test_files: []
|
71
|
+
|
72
|
+
has_rdoc:
|