bot_detection 0.9.9 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +14 -0
- data/README.md +4 -0
- data/bot_detection.gemspec +2 -0
- data/lib/bot_detection/bot_user_agents.rb +7 -0
- data/lib/bot_detection/google_user_agents.rb +1 -0
- data/lib/bot_detection/instance_methods.rb +27 -24
- data/lib/bot_detection/search_engine_user_agents.rb +36 -0
- data/lib/bot_detection/version.rb +1 -1
- data/lib/bot_detection.rb +1 -0
- metadata +26 -15
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2d3eb0c816dab6abd7bedf902b7064e55d87b6b3
|
4
|
+
data.tar.gz: 31db977a5485d47cf579822b3e6a46297730ff91
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9dec5f7768f8a9d382c7a6faa045977e649b89e8e05a0f25f90224f97333e65a33edf023960162c91a11e21c0ebc78ec0f145cba22b06f20c8f71687e670f81a
|
7
|
+
data.tar.gz: ce183a9aa5f595fe46eaa1e9a3a7de8972b4a86d22cb66c4c29e3bf591e80f6f3b80790ad12808b8fb739d1a1d0d8732d1cc7d1815b6cd09477a9eeff0816f74
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
#### Release 1.0.1
|
4
|
+
|
5
|
+
- Yandex support added
|
6
|
+
- Baidu support added
|
7
|
+
- better check for hostnames on reverse lookup
|
8
|
+
- match for full user agent on msn, bing and yahoo
|
9
|
+
- it should now properly detect all google bots
|
10
|
+
- the methods is_msn?, is_bing? and is_yahoo? have been removed
|
11
|
+
|
12
|
+
#### Release 0.9.9
|
13
|
+
|
14
|
+
- Initial version
|
data/README.md
CHANGED
data/bot_detection.gemspec
CHANGED
@@ -16,6 +16,8 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
17
|
spec.require_paths = ["lib"]
|
18
18
|
|
19
|
+
spec.add_runtime_dependency "public_suffix", "~> 1.4.6"
|
20
|
+
|
19
21
|
spec.add_development_dependency "bundler", "~> 1.6"
|
20
22
|
spec.add_development_dependency "rake"
|
21
23
|
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
BotDetection::BotUserAgents = [
|
2
|
+
"FacebookExternalHit/1.1",
|
3
|
+
"FacebookExternalHit/1.0",
|
4
|
+
"facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)",
|
5
|
+
"facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)",
|
6
|
+
"facebookplatform/1.0 (+http://developers.facebook.com)"
|
7
|
+
]
|
@@ -1,40 +1,39 @@
|
|
1
1
|
require 'bot_detection'
|
2
2
|
require_relative 'google_user_agents.rb'
|
3
|
+
require_relative 'bot_user_agents.rb'
|
4
|
+
require_relative 'search_engine_user_agents.rb'
|
3
5
|
|
4
6
|
module BotDetection::InstanceMethods
|
7
|
+
def is_known_crawler?
|
8
|
+
BotDetection::BotUserAgents.include?(user_agent) || is_search_engine_crawler?(reverse_lookup: false)
|
9
|
+
end
|
10
|
+
|
5
11
|
def is_search_engine_crawler? options = {}
|
6
12
|
remote_ip = options.delete(:ip) || options.delete(:ip_address) || request.remote_ip
|
7
|
-
return false if remote_ip.blank?
|
8
|
-
|
13
|
+
return false if remote_ip.blank?
|
14
|
+
|
15
|
+
reverse_lookup = options.delete(:reverse_lookup)
|
16
|
+
reverse_lookup = true if reverse_lookup.nil?
|
17
|
+
|
18
|
+
return false unless is_known_search_engine_crawler?
|
19
|
+
return true unless reverse_lookup
|
9
20
|
return true if options.delete(:development)
|
10
21
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
found = true and break if host.include?(h)
|
15
|
-
end
|
16
|
-
|
17
|
-
return false unless found
|
22
|
+
host = get_hostname(remote_ip)
|
23
|
+
domain = PublicSuffix.parse(host) rescue nil
|
24
|
+
return false if domain.nil?
|
18
25
|
|
19
|
-
|
20
|
-
|
21
|
-
false
|
26
|
+
return false unless ["crawl.yahoo.net", "googlebot.com", "google.com", "search.msn.com", "ask.com", "yandex.net", "yandex.com", "yandex.ru", "baidu.com", "baidu.jp"].include?(domain.domain.downcase)
|
27
|
+
get_hostip(host) == remote_ip
|
22
28
|
end
|
23
29
|
|
24
30
|
def is_google?
|
25
|
-
BotDetection::GOOGLE_USER_AGENTS.include?(
|
26
|
-
end
|
27
|
-
|
28
|
-
def is_yahoo?
|
29
|
-
request.user_agent.to_s.downcase.include?("yahoo! slurp")
|
30
|
-
end
|
31
|
-
|
32
|
-
def is_msn?
|
33
|
-
request.user_agent.to_s.downcase.include?("msnbot")
|
31
|
+
BotDetection::GOOGLE_USER_AGENTS.include?(user_agent)
|
34
32
|
end
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
|
34
|
+
protected
|
35
|
+
def is_known_search_engine_crawler?
|
36
|
+
is_google? || BotDetection::SearchEngineUserAgents.include?(user_agent)
|
38
37
|
end
|
39
38
|
|
40
39
|
def get_hostname(ip_address)
|
@@ -44,4 +43,8 @@ module BotDetection::InstanceMethods
|
|
44
43
|
def get_hostip(host)
|
45
44
|
Socket.gethostbyname(host).last.unpack("C*").join(".")
|
46
45
|
end
|
46
|
+
|
47
|
+
def user_agent
|
48
|
+
(request.env['HTTP_USER_AGENT'] || request.user_agent).to_s
|
49
|
+
end
|
47
50
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
BotDetection::SearchEngineUserAgents = [
|
2
|
+
"Baiduspider+(+http://www.baidu.com/search/spider.htm)",
|
3
|
+
"Baiduspider+(+http://www.baidu.com/search/spider_jp.html)",
|
4
|
+
"BaiDuSpider",
|
5
|
+
"DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
|
6
|
+
"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
|
7
|
+
"Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)",
|
8
|
+
"Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)",
|
9
|
+
"Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)",
|
10
|
+
"Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)",
|
11
|
+
"Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)",
|
12
|
+
"Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)",
|
13
|
+
"Mozilla/5.0 (compatible; YandexPagechecker/1.0; +http://yandex.com/bots)",
|
14
|
+
"Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)",
|
15
|
+
"Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)",
|
16
|
+
"Mozilla/5.0 (compatible; YandexDirect/2.0; Dyatel; +http://yandex.com/bots)",
|
17
|
+
"Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)",
|
18
|
+
"Mozilla/5.0 (compatible; YandexNews/3.0; +http://yandex.com/bots)",
|
19
|
+
"Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots)",
|
20
|
+
"Mozilla/5.0 (compatible; YandexAntivirus/2.0; +http://yandex.com/bots)",
|
21
|
+
"Mozilla/5.0 (compatible; YandexZakladki/3.0; +http://yandex.com/bots)",
|
22
|
+
"Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)",
|
23
|
+
"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
|
24
|
+
"Mozilla/5.0 (compatible; Yahoo Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
|
25
|
+
"Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
|
26
|
+
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
|
27
|
+
"Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)",
|
28
|
+
"msnbot/2.1",
|
29
|
+
"msnbot/2.0b",
|
30
|
+
"msnbot/1.1 (+http://search.msn.com/msnbot.htm)",
|
31
|
+
"msnbot/1.1",
|
32
|
+
"msnbot/1.0 (+http://search.msn.com/msnbot.htm)",
|
33
|
+
"msnbot/0.9 (+http://search.msn.com/msnbot.htm)",
|
34
|
+
"msnbot/0.11 ( http://search.msn.com/msnbot.htm)",
|
35
|
+
"MSNBOT/0.1 (http://search.msn.com/msnbot.htm)",
|
36
|
+
]
|
data/lib/bot_detection.rb
CHANGED
metadata
CHANGED
@@ -1,20 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bot_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Nils Berenbold
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-11-13 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: public_suffix
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.4.6
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.4.6
|
14
27
|
- !ruby/object:Gem::Dependency
|
15
28
|
name: bundler
|
16
29
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
30
|
requirements:
|
19
31
|
- - ~>
|
20
32
|
- !ruby/object:Gem::Version
|
@@ -22,7 +34,6 @@ dependencies:
|
|
22
34
|
type: :development
|
23
35
|
prerelease: false
|
24
36
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
37
|
requirements:
|
27
38
|
- - ~>
|
28
39
|
- !ruby/object:Gem::Version
|
@@ -30,17 +41,15 @@ dependencies:
|
|
30
41
|
- !ruby/object:Gem::Dependency
|
31
42
|
name: rake
|
32
43
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
44
|
requirements:
|
35
|
-
- -
|
45
|
+
- - '>='
|
36
46
|
- !ruby/object:Gem::Version
|
37
47
|
version: '0'
|
38
48
|
type: :development
|
39
49
|
prerelease: false
|
40
50
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
51
|
requirements:
|
43
|
-
- -
|
52
|
+
- - '>='
|
44
53
|
- !ruby/object:Gem::Version
|
45
54
|
version: '0'
|
46
55
|
description:
|
@@ -51,38 +60,40 @@ extensions: []
|
|
51
60
|
extra_rdoc_files: []
|
52
61
|
files:
|
53
62
|
- .gitignore
|
63
|
+
- CHANGELOG.md
|
54
64
|
- Gemfile
|
55
65
|
- LICENSE.txt
|
56
66
|
- README.md
|
57
67
|
- Rakefile
|
58
68
|
- bot_detection.gemspec
|
59
69
|
- lib/bot_detection.rb
|
70
|
+
- lib/bot_detection/bot_user_agents.rb
|
60
71
|
- lib/bot_detection/google_user_agents.rb
|
61
72
|
- lib/bot_detection/instance_methods.rb
|
73
|
+
- lib/bot_detection/search_engine_user_agents.rb
|
62
74
|
- lib/bot_detection/version.rb
|
63
75
|
homepage: http://www.nilsberenbold.de
|
64
76
|
licenses:
|
65
77
|
- MIT
|
78
|
+
metadata: {}
|
66
79
|
post_install_message:
|
67
80
|
rdoc_options: []
|
68
81
|
require_paths:
|
69
82
|
- lib
|
70
83
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
-
none: false
|
72
84
|
requirements:
|
73
|
-
- -
|
85
|
+
- - '>='
|
74
86
|
- !ruby/object:Gem::Version
|
75
87
|
version: '0'
|
76
88
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
-
none: false
|
78
89
|
requirements:
|
79
|
-
- -
|
90
|
+
- - '>='
|
80
91
|
- !ruby/object:Gem::Version
|
81
92
|
version: '0'
|
82
93
|
requirements: []
|
83
94
|
rubyforge_project:
|
84
|
-
rubygems_version:
|
95
|
+
rubygems_version: 2.0.14
|
85
96
|
signing_key:
|
86
|
-
specification_version:
|
97
|
+
specification_version: 4
|
87
98
|
summary: Detects Search Engine crawlers by reverse DNS lookups.
|
88
99
|
test_files: []
|