bot_detection 0.9.9 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +14 -0
- data/README.md +4 -0
- data/bot_detection.gemspec +2 -0
- data/lib/bot_detection/bot_user_agents.rb +7 -0
- data/lib/bot_detection/google_user_agents.rb +1 -0
- data/lib/bot_detection/instance_methods.rb +27 -24
- data/lib/bot_detection/search_engine_user_agents.rb +36 -0
- data/lib/bot_detection/version.rb +1 -1
- data/lib/bot_detection.rb +1 -0
- metadata +26 -15
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2d3eb0c816dab6abd7bedf902b7064e55d87b6b3
|
4
|
+
data.tar.gz: 31db977a5485d47cf579822b3e6a46297730ff91
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9dec5f7768f8a9d382c7a6faa045977e649b89e8e05a0f25f90224f97333e65a33edf023960162c91a11e21c0ebc78ec0f145cba22b06f20c8f71687e670f81a
|
7
|
+
data.tar.gz: ce183a9aa5f595fe46eaa1e9a3a7de8972b4a86d22cb66c4c29e3bf591e80f6f3b80790ad12808b8fb739d1a1d0d8732d1cc7d1815b6cd09477a9eeff0816f74
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
#### Release 1.0.1
|
4
|
+
|
5
|
+
- Yandex support added
|
6
|
+
- Baidu support added
|
7
|
+
- better check for hostnames on reverse lookup
|
8
|
+
- match for full user agent on msn, bing and yahoo
|
9
|
+
- it should now properly detect all google bots
|
10
|
+
- the methods is_msn?, is_bing? and is_yahoo? have been removed
|
11
|
+
|
12
|
+
#### Release 0.9.9
|
13
|
+
|
14
|
+
- Initial version
|
data/README.md
CHANGED
data/bot_detection.gemspec
CHANGED
@@ -16,6 +16,8 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
17
|
spec.require_paths = ["lib"]
|
18
18
|
|
19
|
+
spec.add_runtime_dependency "public_suffix", "~> 1.4.6"
|
20
|
+
|
19
21
|
spec.add_development_dependency "bundler", "~> 1.6"
|
20
22
|
spec.add_development_dependency "rake"
|
21
23
|
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
BotDetection::BotUserAgents = [
|
2
|
+
"FacebookExternalHit/1.1",
|
3
|
+
"FacebookExternalHit/1.0",
|
4
|
+
"facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)",
|
5
|
+
"facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)",
|
6
|
+
"facebookplatform/1.0 (+http://developers.facebook.com)"
|
7
|
+
]
|
@@ -1,40 +1,39 @@
|
|
1
1
|
require 'bot_detection'
|
2
2
|
require_relative 'google_user_agents.rb'
|
3
|
+
require_relative 'bot_user_agents.rb'
|
4
|
+
require_relative 'search_engine_user_agents.rb'
|
3
5
|
|
4
6
|
module BotDetection::InstanceMethods
|
7
|
+
def is_known_crawler?
|
8
|
+
BotDetection::BotUserAgents.include?(user_agent) || is_search_engine_crawler?(reverse_lookup: false)
|
9
|
+
end
|
10
|
+
|
5
11
|
def is_search_engine_crawler? options = {}
|
6
12
|
remote_ip = options.delete(:ip) || options.delete(:ip_address) || request.remote_ip
|
7
|
-
return false if remote_ip.blank?
|
8
|
-
|
13
|
+
return false if remote_ip.blank?
|
14
|
+
|
15
|
+
reverse_lookup = options.delete(:reverse_lookup)
|
16
|
+
reverse_lookup = true if reverse_lookup.nil?
|
17
|
+
|
18
|
+
return false unless is_known_search_engine_crawler?
|
19
|
+
return true unless reverse_lookup
|
9
20
|
return true if options.delete(:development)
|
10
21
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
found = true and break if host.include?(h)
|
15
|
-
end
|
16
|
-
|
17
|
-
return false unless found
|
22
|
+
host = get_hostname(remote_ip)
|
23
|
+
domain = PublicSuffix.parse(host) rescue nil
|
24
|
+
return false if domain.nil?
|
18
25
|
|
19
|
-
|
20
|
-
|
21
|
-
false
|
26
|
+
return false unless ["crawl.yahoo.net", "googlebot.com", "google.com", "search.msn.com", "ask.com", "yandex.net", "yandex.com", "yandex.ru", "baidu.com", "baidu.jp"].include?(domain.domain.downcase)
|
27
|
+
get_hostip(host) == remote_ip
|
22
28
|
end
|
23
29
|
|
24
30
|
def is_google?
|
25
|
-
BotDetection::GOOGLE_USER_AGENTS.include?(
|
26
|
-
end
|
27
|
-
|
28
|
-
def is_yahoo?
|
29
|
-
request.user_agent.to_s.downcase.include?("yahoo! slurp")
|
30
|
-
end
|
31
|
-
|
32
|
-
def is_msn?
|
33
|
-
request.user_agent.to_s.downcase.include?("msnbot")
|
31
|
+
BotDetection::GOOGLE_USER_AGENTS.include?(user_agent)
|
34
32
|
end
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
|
34
|
+
protected
|
35
|
+
def is_known_search_engine_crawler?
|
36
|
+
is_google? || BotDetection::SearchEngineUserAgents.include?(user_agent)
|
38
37
|
end
|
39
38
|
|
40
39
|
def get_hostname(ip_address)
|
@@ -44,4 +43,8 @@ module BotDetection::InstanceMethods
|
|
44
43
|
def get_hostip(host)
|
45
44
|
Socket.gethostbyname(host).last.unpack("C*").join(".")
|
46
45
|
end
|
46
|
+
|
47
|
+
def user_agent
|
48
|
+
(request.env['HTTP_USER_AGENT'] || request.user_agent).to_s
|
49
|
+
end
|
47
50
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
BotDetection::SearchEngineUserAgents = [
|
2
|
+
"Baiduspider+(+http://www.baidu.com/search/spider.htm)",
|
3
|
+
"Baiduspider+(+http://www.baidu.com/search/spider_jp.html)",
|
4
|
+
"BaiDuSpider",
|
5
|
+
"DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
|
6
|
+
"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
|
7
|
+
"Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)",
|
8
|
+
"Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)",
|
9
|
+
"Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)",
|
10
|
+
"Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)",
|
11
|
+
"Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)",
|
12
|
+
"Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)",
|
13
|
+
"Mozilla/5.0 (compatible; YandexPagechecker/1.0; +http://yandex.com/bots)",
|
14
|
+
"Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)",
|
15
|
+
"Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)",
|
16
|
+
"Mozilla/5.0 (compatible; YandexDirect/2.0; Dyatel; +http://yandex.com/bots)",
|
17
|
+
"Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)",
|
18
|
+
"Mozilla/5.0 (compatible; YandexNews/3.0; +http://yandex.com/bots)",
|
19
|
+
"Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots)",
|
20
|
+
"Mozilla/5.0 (compatible; YandexAntivirus/2.0; +http://yandex.com/bots)",
|
21
|
+
"Mozilla/5.0 (compatible; YandexZakladki/3.0; +http://yandex.com/bots)",
|
22
|
+
"Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)",
|
23
|
+
"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
|
24
|
+
"Mozilla/5.0 (compatible; Yahoo Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
|
25
|
+
"Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
|
26
|
+
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
|
27
|
+
"Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)",
|
28
|
+
"msnbot/2.1",
|
29
|
+
"msnbot/2.0b",
|
30
|
+
"msnbot/1.1 (+http://search.msn.com/msnbot.htm)",
|
31
|
+
"msnbot/1.1",
|
32
|
+
"msnbot/1.0 (+http://search.msn.com/msnbot.htm)",
|
33
|
+
"msnbot/0.9 (+http://search.msn.com/msnbot.htm)",
|
34
|
+
"msnbot/0.11 ( http://search.msn.com/msnbot.htm)",
|
35
|
+
"MSNBOT/0.1 (http://search.msn.com/msnbot.htm)",
|
36
|
+
]
|
data/lib/bot_detection.rb
CHANGED
metadata
CHANGED
@@ -1,20 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bot_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Nils Berenbold
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-11-13 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: public_suffix
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.4.6
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.4.6
|
14
27
|
- !ruby/object:Gem::Dependency
|
15
28
|
name: bundler
|
16
29
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
30
|
requirements:
|
19
31
|
- - ~>
|
20
32
|
- !ruby/object:Gem::Version
|
@@ -22,7 +34,6 @@ dependencies:
|
|
22
34
|
type: :development
|
23
35
|
prerelease: false
|
24
36
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
37
|
requirements:
|
27
38
|
- - ~>
|
28
39
|
- !ruby/object:Gem::Version
|
@@ -30,17 +41,15 @@ dependencies:
|
|
30
41
|
- !ruby/object:Gem::Dependency
|
31
42
|
name: rake
|
32
43
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
44
|
requirements:
|
35
|
-
- -
|
45
|
+
- - '>='
|
36
46
|
- !ruby/object:Gem::Version
|
37
47
|
version: '0'
|
38
48
|
type: :development
|
39
49
|
prerelease: false
|
40
50
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
51
|
requirements:
|
43
|
-
- -
|
52
|
+
- - '>='
|
44
53
|
- !ruby/object:Gem::Version
|
45
54
|
version: '0'
|
46
55
|
description:
|
@@ -51,38 +60,40 @@ extensions: []
|
|
51
60
|
extra_rdoc_files: []
|
52
61
|
files:
|
53
62
|
- .gitignore
|
63
|
+
- CHANGELOG.md
|
54
64
|
- Gemfile
|
55
65
|
- LICENSE.txt
|
56
66
|
- README.md
|
57
67
|
- Rakefile
|
58
68
|
- bot_detection.gemspec
|
59
69
|
- lib/bot_detection.rb
|
70
|
+
- lib/bot_detection/bot_user_agents.rb
|
60
71
|
- lib/bot_detection/google_user_agents.rb
|
61
72
|
- lib/bot_detection/instance_methods.rb
|
73
|
+
- lib/bot_detection/search_engine_user_agents.rb
|
62
74
|
- lib/bot_detection/version.rb
|
63
75
|
homepage: http://www.nilsberenbold.de
|
64
76
|
licenses:
|
65
77
|
- MIT
|
78
|
+
metadata: {}
|
66
79
|
post_install_message:
|
67
80
|
rdoc_options: []
|
68
81
|
require_paths:
|
69
82
|
- lib
|
70
83
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
-
none: false
|
72
84
|
requirements:
|
73
|
-
- -
|
85
|
+
- - '>='
|
74
86
|
- !ruby/object:Gem::Version
|
75
87
|
version: '0'
|
76
88
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
-
none: false
|
78
89
|
requirements:
|
79
|
-
- -
|
90
|
+
- - '>='
|
80
91
|
- !ruby/object:Gem::Version
|
81
92
|
version: '0'
|
82
93
|
requirements: []
|
83
94
|
rubyforge_project:
|
84
|
-
rubygems_version:
|
95
|
+
rubygems_version: 2.0.14
|
85
96
|
signing_key:
|
86
|
-
specification_version:
|
97
|
+
specification_version: 4
|
87
98
|
summary: Detects Search Engine crawlers by reverse DNS lookups.
|
88
99
|
test_files: []
|