voight_kampff 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ require 'voight_kampff/test'
2
+ require 'voight_kampff/user_agents_parser'
3
+ require 'voight_kampff/engine' if defined?(Rails)
4
+
5
+ module VoightKampff
6
+
7
+ class << self
8
+
9
+ def root
10
+ Pathname.new File.expand_path '..', File.dirname(__FILE__)
11
+ end
12
+
13
+ def human?(user_agent_string)
14
+ test(user_agent_string).human?
15
+ end
16
+
17
+ def bot?(user_agent_string)
18
+ test(user_agent_string).bot?
19
+ end
20
+ alias :replicant? :bot?
21
+
22
+ def browser?(user_agent_string)
23
+ test(user_agent_string).browser?
24
+ end
25
+
26
+ def checker?(user_agent_string)
27
+ test(user_agent_string).checker?
28
+ end
29
+
30
+ def downloader?(user_agent_string)
31
+ test(user_agent_string).downloader?
32
+ end
33
+
34
+ def proxy?(user_agent_string)
35
+ test(user_agent_string).proxy?
36
+ end
37
+
38
+ def crawler?(user_agent_string)
39
+ test(user_agent_string).crawler?
40
+ end
41
+
42
+ def spam?(user_agent_string)
43
+ test(user_agent_string).spam?
44
+ end
45
+
46
+ private
47
+
48
+ def test(user_agent_string)
49
+ VoightKampff::Test.new(user_agent_string)
50
+ end
51
+
52
+ end
53
+
54
+ end
@@ -0,0 +1,13 @@
1
+ require 'voight_kampff'
2
+ require 'rails'
3
+ require 'active_record'
4
+
5
+ module VoightKampff
6
+ class Engine < Rails::Engine
7
+
8
+ rake_tasks do
9
+ load 'tasks/voight_kampff.rake'
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,76 @@
1
+ module VoightKampff
2
+
3
+ class Test
4
+
5
+ attr_accessor :user_agent_string
6
+ attr_accessor :types
7
+
8
+ def initialize(user_agent_string)
9
+ load_agents
10
+ @user_agent_string = user_agent_string
11
+ @types = Array(agent['types']).map(&:to_sym)
12
+ @description = agent['description']
13
+ end
14
+
15
+ def agent
16
+ @agent ||= @@agents.find do |agent|
17
+ if agent['string_match'] == 'regex'
18
+ @user_agent_string =~ Regexp.new(agent['string'], Regexp::IGNORECASE)
19
+ else
20
+ @user_agent_string.casecmp(agent['string']) == 0
21
+ end
22
+ end || {}
23
+ end
24
+
25
+ def has_type?(*types)
26
+ return nil if agent.empty?
27
+ types.any? do |type|
28
+ @types.include? type
29
+ end
30
+ end
31
+
32
+ def human?
33
+ has_type? :browser, :downloader, :proxy
34
+ end
35
+
36
+ def bot?
37
+ has_type? :checker, :crawler, :spam
38
+ end
39
+ alias :replicant? :bot?
40
+
41
+ def browser?
42
+ has_type? :browser
43
+ end
44
+
45
+ def checker?
46
+ has_type? :checker
47
+ end
48
+
49
+ def downloader?
50
+ has_type? :downloader
51
+ end
52
+
53
+ def proxy?
54
+ has_type? :proxy
55
+ end
56
+
57
+ def crawler?
58
+ has_type? :crawler
59
+ end
60
+
61
+ def spam?
62
+ has_type? :spam
63
+ end
64
+
65
+ private
66
+
67
+ def load_agents
68
+ rel_path = ['config', 'user_agents.yml']
69
+ abs_path = Rails.root.join(*rel_path)
70
+ abs_path = VoightKampff.root.join(*rel_path) unless File.exists? abs_path
71
+ @@agents ||= YAML.load(File.open(abs_path, 'r'))
72
+ end
73
+
74
+ end
75
+
76
+ end
@@ -0,0 +1,54 @@
1
+ module VoightKampff
2
+
3
+ class UserAgentsParser
4
+
5
+ # To generate a new user_agents.yml file run rake voight_kampff:import_user_agents
6
+ # This class is designed to work with http://www.user-agents.org/allagents.xml
7
+
8
+ require 'nokogiri'
9
+ require 'httpclient'
10
+
11
+ def initialize(url)
12
+ @url = url
13
+ @xml = HTTPClient.new.get_content(url)
14
+ end
15
+
16
+ def agents
17
+ agents = []
18
+ xml = Nokogiri::XML(@xml)
19
+ xml.xpath('//user-agent').each do |agent|
20
+ agents << {
21
+ 'string' => agent.xpath('String').text,
22
+ 'string_match' => 'exact', # exact or regex
23
+ 'types' => agent.xpath('Type').text.split.map{|t| clarify_type(t) }.compact,
24
+ 'description' => agent.xpath('Description').text
25
+ }
26
+ end
27
+ agents
28
+ end
29
+
30
+ def to_array
31
+ agents
32
+ end
33
+ alias :to_a :to_array
34
+
35
+ def to_yaml
36
+ agents.to_yaml
37
+ end
38
+
39
+ private
40
+
41
+ def clarify_type(type)
42
+ case type.downcase.to_sym
43
+ when :b then 'browser'
44
+ when :c then 'checker'
45
+ when :d then 'downloader'
46
+ when :p then 'proxy'
47
+ when :r then 'crawler'
48
+ when :s then 'spam'
49
+ end
50
+ end
51
+
52
+ end
53
+
54
+ end
@@ -0,0 +1,5 @@
1
+ module VoightKampff
2
+
3
+ VERSION = '0.1.0'
4
+
5
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: voight_kampff
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Adam Crownoble
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-05-11 00:00:00 Z
19
+ dependencies: []
20
+
21
+ description: Voight-Kampff is a Ruby gem that detects bots, spiders, crawlers and replicants
22
+ email: adam@obledesign.com
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files: []
28
+
29
+ files:
30
+ - config/user_agents.yml
31
+ - config/initializers/extend_action_dispatch_requset.rb
32
+ - lib/voight_kampff.rb
33
+ - lib/voight_kampff/user_agents_parser.rb
34
+ - lib/voight_kampff/version.rb
35
+ - lib/voight_kampff/test.rb
36
+ - lib/voight_kampff/engine.rb
37
+ homepage: https://github.com/adamcrown/Voight-Kampff
38
+ licenses: []
39
+
40
+ post_install_message:
41
+ rdoc_options: []
42
+
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ hash: 3
51
+ segments:
52
+ - 0
53
+ version: "0"
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ hash: 3
60
+ segments:
61
+ - 0
62
+ version: "0"
63
+ requirements: []
64
+
65
+ rubyforge_project:
66
+ rubygems_version: 1.7.2
67
+ signing_key:
68
+ specification_version: 3
69
+ summary: Voight-Kampff bot detection
70
+ test_files: []
71
+
72
+ has_rdoc: