bot_detection 0.9.9 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2d3eb0c816dab6abd7bedf902b7064e55d87b6b3
4
+ data.tar.gz: 31db977a5485d47cf579822b3e6a46297730ff91
5
+ SHA512:
6
+ metadata.gz: 9dec5f7768f8a9d382c7a6faa045977e649b89e8e05a0f25f90224f97333e65a33edf023960162c91a11e21c0ebc78ec0f145cba22b06f20c8f71687e670f81a
7
+ data.tar.gz: ce183a9aa5f595fe46eaa1e9a3a7de8972b4a86d22cb66c4c29e3bf591e80f6f3b80790ad12808b8fb739d1a1d0d8732d1cc7d1815b6cd09477a9eeff0816f74
data/CHANGELOG.md ADDED
@@ -0,0 +1,14 @@
1
+ # Changelog
2
+
3
+ #### Release 1.0.1
4
+
5
+ - Yandex support added
6
+ - Baidu support added
7
+ - better check for hostnames on reverse lookup
8
+ - match for full user agent on msn, bing and yahoo
9
+ - it should now properly detect all google bots
10
+ - the methods is_msn?, is_bing? and is_yahoo? have been removed
11
+
12
+ #### Release 0.9.9
13
+
14
+ - Initial version
data/README.md CHANGED
@@ -16,6 +16,10 @@ Or install it yourself as:
16
16
 
17
17
  $ gem install bot_detection
18
18
 
19
+ ## Changes
20
+
21
+ See the [CHANGELOG.md](CHANGELOG.md) file for details.
22
+
19
23
  ## Usage
20
24
 
21
25
  TODO: Write usage instructions here
@@ -16,6 +16,8 @@ Gem::Specification.new do |spec|
16
16
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
17
  spec.require_paths = ["lib"]
18
18
 
19
+ spec.add_runtime_dependency "public_suffix", "~> 1.4.6"
20
+
19
21
  spec.add_development_dependency "bundler", "~> 1.6"
20
22
  spec.add_development_dependency "rake"
21
23
  end
@@ -0,0 +1,7 @@
1
+ BotDetection::BotUserAgents = [
2
+ "FacebookExternalHit/1.1",
3
+ "FacebookExternalHit/1.0",
4
+ "facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)",
5
+ "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)",
6
+ "facebookplatform/1.0 (+http://developers.facebook.com)"
7
+ ]
@@ -1,5 +1,6 @@
1
1
  BotDetection::GOOGLE_USER_AGENTS = [
2
2
  "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
3
+ "Googlebot/2.1 (+http://www.googlebot.com/bot.html)",
3
4
  "Googlebot/2.1 (+http://www.google.com/bot.html)",
4
5
  "Googlebot-News",
5
6
  "Googlebot-Image/1.0",
@@ -1,40 +1,39 @@
1
1
  require 'bot_detection'
2
2
  require_relative 'google_user_agents.rb'
3
+ require_relative 'bot_user_agents.rb'
4
+ require_relative 'search_engine_user_agents.rb'
3
5
 
4
6
  module BotDetection::InstanceMethods
7
+ def is_known_crawler?
8
+ BotDetection::BotUserAgents.include?(user_agent) || is_search_engine_crawler?(reverse_lookup: false)
9
+ end
10
+
5
11
  def is_search_engine_crawler? options = {}
6
12
  remote_ip = options.delete(:ip) || options.delete(:ip_address) || request.remote_ip
7
- return false if remote_ip.blank? || (!is_google? && !is_yahoo? && !is_msn? && !is_bing?)
8
-
13
+ return false if remote_ip.blank?
14
+
15
+ reverse_lookup = options.delete(:reverse_lookup)
16
+ reverse_lookup = true if reverse_lookup.nil?
17
+
18
+ return false unless is_known_search_engine_crawler?
19
+ return true unless reverse_lookup
9
20
  return true if options.delete(:development)
10
21
 
11
- found = false
12
- host = get_hostname(remote_ip)
13
- ["crawl.yahoo.net", "googlebot", "search.msn.com", "ask.com"].each do |h|
14
- found = true and break if host.include?(h)
15
- end
16
-
17
- return false unless found
22
+ host = get_hostname(remote_ip)
23
+ domain = PublicSuffix.parse(host) rescue nil
24
+ return false if domain.nil?
18
25
 
19
- host_ip = get_hostip(host)
20
- return host_ip == remote_ip
21
- false
26
+ return false unless ["crawl.yahoo.net", "googlebot.com", "google.com", "search.msn.com", "ask.com", "yandex.net", "yandex.com", "yandex.ru", "baidu.com", "baidu.jp"].include?(domain.domain.downcase)
27
+ get_hostip(host) == remote_ip
22
28
  end
23
29
 
24
30
  def is_google?
25
- BotDetection::GOOGLE_USER_AGENTS.include?(request.env['HTTP_USER_AGENT']) || request.user_agent.to_s.downcase.include?("googlebot")
26
- end
27
-
28
- def is_yahoo?
29
- request.user_agent.to_s.downcase.include?("yahoo! slurp")
30
- end
31
-
32
- def is_msn?
33
- request.user_agent.to_s.downcase.include?("msnbot")
31
+ BotDetection::GOOGLE_USER_AGENTS.include?(user_agent)
34
32
  end
35
-
36
- def is_bing?
37
- request.user_agent.to_s.downcase.include?("bing")
33
+
34
+ protected
35
+ def is_known_search_engine_crawler?
36
+ is_google? || BotDetection::SearchEngineUserAgents.include?(user_agent)
38
37
  end
39
38
 
40
39
  def get_hostname(ip_address)
@@ -44,4 +43,8 @@ module BotDetection::InstanceMethods
44
43
  def get_hostip(host)
45
44
  Socket.gethostbyname(host).last.unpack("C*").join(".")
46
45
  end
46
+
47
+ def user_agent
48
+ (request.env['HTTP_USER_AGENT'] || request.user_agent).to_s
49
+ end
47
50
  end
@@ -0,0 +1,36 @@
1
+ BotDetection::SearchEngineUserAgents = [
2
+ "Baiduspider+(+http://www.baidu.com/search/spider.htm)",
3
+ "Baiduspider+(+http://www.baidu.com/search/spider_jp.html)",
4
+ "BaiDuSpider",
5
+ "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
6
+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
7
+ "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)",
8
+ "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)",
9
+ "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)",
10
+ "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)",
11
+ "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)",
12
+ "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)",
13
+ "Mozilla/5.0 (compatible; YandexPagechecker/1.0; +http://yandex.com/bots)",
14
+ "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)",
15
+ "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)",
16
+ "Mozilla/5.0 (compatible; YandexDirect/2.0; Dyatel; +http://yandex.com/bots)",
17
+ "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)",
18
+ "Mozilla/5.0 (compatible; YandexNews/3.0; +http://yandex.com/bots)",
19
+ "Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots)",
20
+ "Mozilla/5.0 (compatible; YandexAntivirus/2.0; +http://yandex.com/bots)",
21
+ "Mozilla/5.0 (compatible; YandexZakladki/3.0; +http://yandex.com/bots)",
22
+ "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)",
23
+ "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
24
+ "Mozilla/5.0 (compatible; Yahoo Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
25
+ "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
26
+ "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
27
+ "Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)",
28
+ "msnbot/2.1",
29
+ "msnbot/2.0b",
30
+ "msnbot/1.1 (+http://search.msn.com/msnbot.htm)",
31
+ "msnbot/1.1",
32
+ "msnbot/1.0 (+http://search.msn.com/msnbot.htm)",
33
+ "msnbot/0.9 (+http://search.msn.com/msnbot.htm)",
34
+ "msnbot/0.11 ( http://search.msn.com/msnbot.htm)",
35
+ "MSNBOT/0.1 (http://search.msn.com/msnbot.htm)",
36
+ ]
@@ -1,3 +1,3 @@
1
1
  module BotDetection
2
- VERSION = "0.9.9"
2
+ VERSION = "1.0.1"
3
3
  end
data/lib/bot_detection.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "bot_detection/version"
2
+ require "public_suffix"
2
3
 
3
4
  module BotDetection
4
5
  def self.included(target)
metadata CHANGED
@@ -1,20 +1,32 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bot_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.9
5
- prerelease:
4
+ version: 1.0.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - Nils Berenbold
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-10-07 00:00:00.000000000 Z
11
+ date: 2014-11-13 00:00:00.000000000 Z
13
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: public_suffix
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.6
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.4.6
14
27
  - !ruby/object:Gem::Dependency
15
28
  name: bundler
16
29
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
30
  requirements:
19
31
  - - ~>
20
32
  - !ruby/object:Gem::Version
@@ -22,7 +34,6 @@ dependencies:
22
34
  type: :development
23
35
  prerelease: false
24
36
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
37
  requirements:
27
38
  - - ~>
28
39
  - !ruby/object:Gem::Version
@@ -30,17 +41,15 @@ dependencies:
30
41
  - !ruby/object:Gem::Dependency
31
42
  name: rake
32
43
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
44
  requirements:
35
- - - ! '>='
45
+ - - '>='
36
46
  - !ruby/object:Gem::Version
37
47
  version: '0'
38
48
  type: :development
39
49
  prerelease: false
40
50
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
51
  requirements:
43
- - - ! '>='
52
+ - - '>='
44
53
  - !ruby/object:Gem::Version
45
54
  version: '0'
46
55
  description:
@@ -51,38 +60,40 @@ extensions: []
51
60
  extra_rdoc_files: []
52
61
  files:
53
62
  - .gitignore
63
+ - CHANGELOG.md
54
64
  - Gemfile
55
65
  - LICENSE.txt
56
66
  - README.md
57
67
  - Rakefile
58
68
  - bot_detection.gemspec
59
69
  - lib/bot_detection.rb
70
+ - lib/bot_detection/bot_user_agents.rb
60
71
  - lib/bot_detection/google_user_agents.rb
61
72
  - lib/bot_detection/instance_methods.rb
73
+ - lib/bot_detection/search_engine_user_agents.rb
62
74
  - lib/bot_detection/version.rb
63
75
  homepage: http://www.nilsberenbold.de
64
76
  licenses:
65
77
  - MIT
78
+ metadata: {}
66
79
  post_install_message:
67
80
  rdoc_options: []
68
81
  require_paths:
69
82
  - lib
70
83
  required_ruby_version: !ruby/object:Gem::Requirement
71
- none: false
72
84
  requirements:
73
- - - ! '>='
85
+ - - '>='
74
86
  - !ruby/object:Gem::Version
75
87
  version: '0'
76
88
  required_rubygems_version: !ruby/object:Gem::Requirement
77
- none: false
78
89
  requirements:
79
- - - ! '>='
90
+ - - '>='
80
91
  - !ruby/object:Gem::Version
81
92
  version: '0'
82
93
  requirements: []
83
94
  rubyforge_project:
84
- rubygems_version: 1.8.24
95
+ rubygems_version: 2.0.14
85
96
  signing_key:
86
- specification_version: 3
97
+ specification_version: 4
87
98
  summary: Detects Search Engine crawlers by reverse DNS lookups.
88
99
  test_files: []