voight_kampff 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,54 @@
1
+ require 'voight_kampff/test'
2
+ require 'voight_kampff/user_agents_parser'
3
+ require 'voight_kampff/engine' if defined?(Rails)
4
+
5
+ module VoightKampff
6
+
7
+ class << self
8
+
9
+ def root
10
+ Pathname.new File.expand_path '..', File.dirname(__FILE__)
11
+ end
12
+
13
+ def human?(user_agent_string)
14
+ test(user_agent_string).human?
15
+ end
16
+
17
+ def bot?(user_agent_string)
18
+ test(user_agent_string).bot?
19
+ end
20
+ alias :replicant? :bot?
21
+
22
+ def browser?(user_agent_string)
23
+ test(user_agent_string).browser?
24
+ end
25
+
26
+ def checker?(user_agent_string)
27
+ test(user_agent_string).checker?
28
+ end
29
+
30
+ def downloader?(user_agent_string)
31
+ test(user_agent_string).downloader?
32
+ end
33
+
34
+ def proxy?(user_agent_string)
35
+ test(user_agent_string).proxy?
36
+ end
37
+
38
+ def crawler?(user_agent_string)
39
+ test(user_agent_string).crawler?
40
+ end
41
+
42
+ def spam?(user_agent_string)
43
+ test(user_agent_string).spam?
44
+ end
45
+
46
+ private
47
+
48
+ def test(user_agent_string)
49
+ VoightKampff::Test.new(user_agent_string)
50
+ end
51
+
52
+ end
53
+
54
+ end
@@ -0,0 +1,13 @@
1
+ require 'voight_kampff'
2
+ require 'rails'
3
+ require 'active_record'
4
+
5
+ module VoightKampff
6
+ class Engine < Rails::Engine
7
+
8
+ rake_tasks do
9
+ load 'tasks/voight_kampff.rake'
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,76 @@
1
+ module VoightKampff
2
+
3
+ class Test
4
+
5
+ attr_accessor :user_agent_string
6
+ attr_accessor :types
7
+
8
+ def initialize(user_agent_string)
9
+ load_agents
10
+ @user_agent_string = user_agent_string
11
+ @types = Array(agent['types']).map(&:to_sym)
12
+ @description = agent['description']
13
+ end
14
+
15
+ def agent
16
+ @agent ||= @@agents.find do |agent|
17
+ if agent['string_match'] == 'regex'
18
+ @user_agent_string =~ Regexp.new(agent['string'], Regexp::IGNORECASE)
19
+ else
20
+ @user_agent_string.casecmp(agent['string']) == 0
21
+ end
22
+ end || {}
23
+ end
24
+
25
+ def has_type?(*types)
26
+ return nil if agent.empty?
27
+ types.any? do |type|
28
+ @types.include? type
29
+ end
30
+ end
31
+
32
+ def human?
33
+ has_type? :browser, :downloader, :proxy
34
+ end
35
+
36
+ def bot?
37
+ has_type? :checker, :crawler, :spam
38
+ end
39
+ alias :replicant? :bot?
40
+
41
+ def browser?
42
+ has_type? :browser
43
+ end
44
+
45
+ def checker?
46
+ has_type? :checker
47
+ end
48
+
49
+ def downloader?
50
+ has_type? :downloader
51
+ end
52
+
53
+ def proxy?
54
+ has_type? :proxy
55
+ end
56
+
57
+ def crawler?
58
+ has_type? :crawler
59
+ end
60
+
61
+ def spam?
62
+ has_type? :spam
63
+ end
64
+
65
+ private
66
+
67
+ def load_agents
68
+ rel_path = ['config', 'user_agents.yml']
69
+ abs_path = Rails.root.join(*rel_path)
70
+ abs_path = VoightKampff.root.join(*rel_path) unless File.exists? abs_path
71
+ @@agents ||= YAML.load(File.open(abs_path, 'r'))
72
+ end
73
+
74
+ end
75
+
76
+ end
@@ -0,0 +1,54 @@
1
+ module VoightKampff
2
+
3
+ class UserAgentsParser
4
+
5
+ # To generate a new user_agents.yml file run rake voight_kampff:import_user_agents
6
+ # This class is designed to work with http://www.user-agents.org/allagents.xml
7
+
8
+ require 'nokogiri'
9
+ require 'httpclient'
10
+
11
+ def initialize(url)
12
+ @url = url
13
+ @xml = HTTPClient.new.get_content(url)
14
+ end
15
+
16
+ def agents
17
+ agents = []
18
+ xml = Nokogiri::XML(@xml)
19
+ xml.xpath('//user-agent').each do |agent|
20
+ agents << {
21
+ 'string' => agent.xpath('String').text,
22
+ 'string_match' => 'exact', # exact or regex
23
+ 'types' => agent.xpath('Type').text.split.map{|t| clarify_type(t) }.compact,
24
+ 'description' => agent.xpath('Description').text
25
+ }
26
+ end
27
+ agents
28
+ end
29
+
30
+ def to_array
31
+ agents
32
+ end
33
+ alias :to_a :to_array
34
+
35
+ def to_yaml
36
+ agents.to_yaml
37
+ end
38
+
39
+ private
40
+
41
+ def clarify_type(type)
42
+ case type.downcase.to_sym
43
+ when :b then 'browser'
44
+ when :c then 'checker'
45
+ when :d then 'downloader'
46
+ when :p then 'proxy'
47
+ when :r then 'crawler'
48
+ when :s then 'spam'
49
+ end
50
+ end
51
+
52
+ end
53
+
54
+ end
@@ -0,0 +1,5 @@
1
+ module VoightKampff
2
+
3
+ VERSION = '0.1.0'
4
+
5
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: voight_kampff
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Adam Crownoble
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-05-11 00:00:00 Z
19
+ dependencies: []
20
+
21
+ description: Voight-Kampff is a Ruby gem that detects bots, spiders, crawlers and replicants
22
+ email: adam@obledesign.com
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files: []
28
+
29
+ files:
30
+ - config/user_agents.yml
31
+ - config/initializers/extend_action_dispatch_requset.rb
32
+ - lib/voight_kampff.rb
33
+ - lib/voight_kampff/user_agents_parser.rb
34
+ - lib/voight_kampff/version.rb
35
+ - lib/voight_kampff/test.rb
36
+ - lib/voight_kampff/engine.rb
37
+ homepage: https://github.com/adamcrown/Voight-Kampff
38
+ licenses: []
39
+
40
+ post_install_message:
41
+ rdoc_options: []
42
+
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ hash: 3
51
+ segments:
52
+ - 0
53
+ version: "0"
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ hash: 3
60
+ segments:
61
+ - 0
62
+ version: "0"
63
+ requirements: []
64
+
65
+ rubyforge_project:
66
+ rubygems_version: 1.7.2
67
+ signing_key:
68
+ specification_version: 3
69
+ summary: Voight-Kampff bot detection
70
+ test_files: []
71
+
72
+ has_rdoc: