voight_kampff 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/config/initializers/extend_action_dispatch_requset.rb +46 -0
- data/config/user_agents.yml +12438 -0
- data/lib/voight_kampff.rb +54 -0
- data/lib/voight_kampff/engine.rb +13 -0
- data/lib/voight_kampff/test.rb +76 -0
- data/lib/voight_kampff/user_agents_parser.rb +54 -0
- data/lib/voight_kampff/version.rb +5 -0
- metadata +72 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'voight_kampff/test'
|
2
|
+
require 'voight_kampff/user_agents_parser'
|
3
|
+
require 'voight_kampff/engine' if defined?(Rails)
|
4
|
+
|
5
|
+
module VoightKampff
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def root
|
10
|
+
Pathname.new File.expand_path '..', File.dirname(__FILE__)
|
11
|
+
end
|
12
|
+
|
13
|
+
def human?(user_agent_string)
|
14
|
+
test(user_agent_string).human?
|
15
|
+
end
|
16
|
+
|
17
|
+
def bot?(user_agent_string)
|
18
|
+
test(user_agent_string).bot?
|
19
|
+
end
|
20
|
+
alias :replicant? :bot?
|
21
|
+
|
22
|
+
def browser?(user_agent_string)
|
23
|
+
test(user_agent_string).browser?
|
24
|
+
end
|
25
|
+
|
26
|
+
def checker?(user_agent_string)
|
27
|
+
test(user_agent_string).checker?
|
28
|
+
end
|
29
|
+
|
30
|
+
def downloader?(user_agent_string)
|
31
|
+
test(user_agent_string).downloader?
|
32
|
+
end
|
33
|
+
|
34
|
+
def proxy?(user_agent_string)
|
35
|
+
test(user_agent_string).proxy?
|
36
|
+
end
|
37
|
+
|
38
|
+
def crawler?(user_agent_string)
|
39
|
+
test(user_agent_string).crawler?
|
40
|
+
end
|
41
|
+
|
42
|
+
def spam?(user_agent_string)
|
43
|
+
test(user_agent_string).spam?
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def test(user_agent_string)
|
49
|
+
VoightKampff::Test.new(user_agent_string)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module VoightKampff
|
2
|
+
|
3
|
+
class Test
|
4
|
+
|
5
|
+
attr_accessor :user_agent_string
|
6
|
+
attr_accessor :types
|
7
|
+
|
8
|
+
def initialize(user_agent_string)
|
9
|
+
load_agents
|
10
|
+
@user_agent_string = user_agent_string
|
11
|
+
@types = Array(agent['types']).map(&:to_sym)
|
12
|
+
@description = agent['description']
|
13
|
+
end
|
14
|
+
|
15
|
+
def agent
|
16
|
+
@agent ||= @@agents.find do |agent|
|
17
|
+
if agent['string_match'] == 'regex'
|
18
|
+
@user_agent_string =~ Regexp.new(agent['string'], Regexp::IGNORECASE)
|
19
|
+
else
|
20
|
+
@user_agent_string.casecmp(agent['string']) == 0
|
21
|
+
end
|
22
|
+
end || {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def has_type?(*types)
|
26
|
+
return nil if agent.empty?
|
27
|
+
types.any? do |type|
|
28
|
+
@types.include? type
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def human?
|
33
|
+
has_type? :browser, :downloader, :proxy
|
34
|
+
end
|
35
|
+
|
36
|
+
def bot?
|
37
|
+
has_type? :checker, :crawler, :spam
|
38
|
+
end
|
39
|
+
alias :replicant? :bot?
|
40
|
+
|
41
|
+
def browser?
|
42
|
+
has_type? :browser
|
43
|
+
end
|
44
|
+
|
45
|
+
def checker?
|
46
|
+
has_type? :checker
|
47
|
+
end
|
48
|
+
|
49
|
+
def downloader?
|
50
|
+
has_type? :downloader
|
51
|
+
end
|
52
|
+
|
53
|
+
def proxy?
|
54
|
+
has_type? :proxy
|
55
|
+
end
|
56
|
+
|
57
|
+
def crawler?
|
58
|
+
has_type? :crawler
|
59
|
+
end
|
60
|
+
|
61
|
+
def spam?
|
62
|
+
has_type? :spam
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def load_agents
|
68
|
+
rel_path = ['config', 'user_agents.yml']
|
69
|
+
abs_path = Rails.root.join(*rel_path)
|
70
|
+
abs_path = VoightKampff.root.join(*rel_path) unless File.exists? abs_path
|
71
|
+
@@agents ||= YAML.load(File.open(abs_path, 'r'))
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module VoightKampff
|
2
|
+
|
3
|
+
class UserAgentsParser
|
4
|
+
|
5
|
+
# To generate a new user_agents.yml file run rake voight_kampff:import_user_agents
|
6
|
+
# This class is designed to work with http://www.user-agents.org/allagents.xml
|
7
|
+
|
8
|
+
require 'nokogiri'
|
9
|
+
require 'httpclient'
|
10
|
+
|
11
|
+
def initialize(url)
|
12
|
+
@url = url
|
13
|
+
@xml = HTTPClient.new.get_content(url)
|
14
|
+
end
|
15
|
+
|
16
|
+
def agents
|
17
|
+
agents = []
|
18
|
+
xml = Nokogiri::XML(@xml)
|
19
|
+
xml.xpath('//user-agent').each do |agent|
|
20
|
+
agents << {
|
21
|
+
'string' => agent.xpath('String').text,
|
22
|
+
'string_match' => 'exact', # exact or regex
|
23
|
+
'types' => agent.xpath('Type').text.split.map{|t| clarify_type(t) }.compact,
|
24
|
+
'description' => agent.xpath('Description').text
|
25
|
+
}
|
26
|
+
end
|
27
|
+
agents
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_array
|
31
|
+
agents
|
32
|
+
end
|
33
|
+
alias :to_a :to_array
|
34
|
+
|
35
|
+
def to_yaml
|
36
|
+
agents.to_yaml
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def clarify_type(type)
|
42
|
+
case type.downcase.to_sym
|
43
|
+
when :b then 'browser'
|
44
|
+
when :c then 'checker'
|
45
|
+
when :d then 'downloader'
|
46
|
+
when :p then 'proxy'
|
47
|
+
when :r then 'crawler'
|
48
|
+
when :s then 'spam'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: voight_kampff
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Adam Crownoble
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-05-11 00:00:00 Z
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: Voight-Kampff is a Ruby gem that detects bots, spiders, crawlers and replicants
|
22
|
+
email: adam@obledesign.com
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files: []
|
28
|
+
|
29
|
+
files:
|
30
|
+
- config/user_agents.yml
|
31
|
+
- config/initializers/extend_action_dispatch_requset.rb
|
32
|
+
- lib/voight_kampff.rb
|
33
|
+
- lib/voight_kampff/user_agents_parser.rb
|
34
|
+
- lib/voight_kampff/version.rb
|
35
|
+
- lib/voight_kampff/test.rb
|
36
|
+
- lib/voight_kampff/engine.rb
|
37
|
+
homepage: https://github.com/adamcrown/Voight-Kampff
|
38
|
+
licenses: []
|
39
|
+
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
hash: 3
|
51
|
+
segments:
|
52
|
+
- 0
|
53
|
+
version: "0"
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
hash: 3
|
60
|
+
segments:
|
61
|
+
- 0
|
62
|
+
version: "0"
|
63
|
+
requirements: []
|
64
|
+
|
65
|
+
rubyforge_project:
|
66
|
+
rubygems_version: 1.7.2
|
67
|
+
signing_key:
|
68
|
+
specification_version: 3
|
69
|
+
summary: Voight-Kampff bot detection
|
70
|
+
test_files: []
|
71
|
+
|
72
|
+
has_rdoc:
|