legitbot 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -2
- data/Rakefile +7 -0
- data/legitbot.gemspec +3 -0
- data/lib/legitbot/baidu.rb +0 -4
- data/lib/legitbot/bing.rb +0 -4
- data/lib/legitbot/duckduckgo.rb +0 -4
- data/lib/legitbot/facebook.rb +33 -0
- data/lib/legitbot/google.rb +0 -4
- data/lib/legitbot/version.rb +1 -1
- data/lib/legitbot/yandex.rb +0 -4
- data/lib/legitbot.rb +5 -3
- data/test/facebook_test.rb +22 -0
- metadata +47 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f26aea921fa62377a7a180c49ac426cc42168c98
|
4
|
+
data.tar.gz: fb2fa644e6cf65edf0f068417b63d0a650ceac5f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c909f01468f9a4ea1a8f8cee045d03f1d0736f1868d43172f639379bb05a0b7ba1092ce678465a26495df282fe98b0f0a2542903390b5782b94230a62dff20d
|
7
|
+
data.tar.gz: a8b36a573cda2eaba60cca50cc6a8a28356278257e634b8392e0a3ff719627d73eab9835ae9e3b21a248cd465362c904037c0c0f286533fbd2dabe68ca9532fb
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Legitbot
|
1
|
+
# Legitbot [](http://travis-ci.org/alaz/legitbot) [](https://badge.fury.io/rb/legitbot)
|
2
2
|
|
3
3
|
Ruby gem to check if an IP really belongs to some bot, typically a search
|
4
4
|
engine. This can of much help if one wants to protect his/her web site from
|
@@ -31,10 +31,21 @@ Rack::Attack.blocklist("fake Googlebot") do |req|
|
|
31
31
|
end
|
32
32
|
```
|
33
33
|
|
34
|
+
## Supported
|
35
|
+
|
36
|
+
* [Google crawlers](https://support.google.com/webmasters/answer/1061943)
|
37
|
+
* [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
|
38
|
+
* [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
39
|
+
* [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
40
|
+
* [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
|
41
|
+
* [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
|
42
|
+
|
34
43
|
## Issues, problems, plans
|
35
44
|
|
36
45
|
* Rails middleware
|
37
|
-
* Facebook
|
46
|
+
* More testing for Facebook
|
47
|
+
* Review for thread safety
|
48
|
+
* Make it possible to reload Facebook IP ranges
|
38
49
|
|
39
50
|
## License
|
40
51
|
|
data/Rakefile
CHANGED
data/legitbot.gemspec
CHANGED
@@ -15,6 +15,9 @@ Gem::Specification.new do |spec|
|
|
15
15
|
"made by a real search engine, not a fake"
|
16
16
|
|
17
17
|
spec.required_ruby_version = '>= 2.0.0'
|
18
|
+
spec.add_dependency "irrc"
|
19
|
+
spec.add_dependency "segment_tree"
|
20
|
+
spec.add_dependency "concurrent-ruby"
|
18
21
|
spec.add_development_dependency "rake"
|
19
22
|
spec.add_development_dependency "minitest"
|
20
23
|
|
data/lib/legitbot/baidu.rb
CHANGED
data/lib/legitbot/bing.rb
CHANGED
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -3,10 +3,6 @@ module Legitbot
|
|
3
3
|
class DuckDuckGo < BotMatch
|
4
4
|
ValidIPs = %w(72.94.249.34 72.94.249.35 72.94.249.36 72.94.249.37 72.94.249.38)
|
5
5
|
|
6
|
-
def initialize(ip, resolver_config = nil)
|
7
|
-
super(ip, resolver_config)
|
8
|
-
end
|
9
|
-
|
10
6
|
def valid?
|
11
7
|
DuckDuckGo::ValidIPs.include? @ip
|
12
8
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'segment_tree'
|
2
|
+
require 'irrc'
|
3
|
+
require 'concurrent'
|
4
|
+
|
5
|
+
module Legitbot
|
6
|
+
# https://developers.facebook.com/docs/sharing/webmasters/crawler
|
7
|
+
|
8
|
+
class Facebook < BotMatch
|
9
|
+
AS = 'AS32934'
|
10
|
+
ValidIPs = Concurrent::Delay.new do
|
11
|
+
client = Irrc::Client.new
|
12
|
+
client.query :radb, 'AS32934'
|
13
|
+
results = client.perform
|
14
|
+
|
15
|
+
Hash[%i(ipv4 ipv6).map { |k|
|
16
|
+
[k, SegmentTree.new(results[AS][k][AS].map { |cidr|
|
17
|
+
[IPAddr.new(cidr).to_range, true]
|
18
|
+
})]
|
19
|
+
}]
|
20
|
+
end
|
21
|
+
|
22
|
+
def valid?
|
23
|
+
ip = IPAddr.new(@ip)
|
24
|
+
if ip.ipv4?
|
25
|
+
ValidIPs.value[:ipv4].find(ip)
|
26
|
+
else
|
27
|
+
ValidIPs.value[:ipv6].find(ip)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
rule Legitbot::Facebook, %w(facebookhit facebookexternalhit)
|
33
|
+
end
|
data/lib/legitbot/google.rb
CHANGED
@@ -7,10 +7,6 @@ module Legitbot
|
|
7
7
|
class Google < BotMatch
|
8
8
|
ValidDomains = ["google.com.", "googlebot.com."]
|
9
9
|
|
10
|
-
def initialize(ip, resolver_config = nil)
|
11
|
-
super(ip, resolver_config)
|
12
|
-
end
|
13
|
-
|
14
10
|
def valid?
|
15
11
|
subdomain_of?(*Google::ValidDomains) && reverse_resolves?
|
16
12
|
end
|
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot/yandex.rb
CHANGED
@@ -4,10 +4,6 @@ module Legitbot
|
|
4
4
|
class Yandex < BotMatch
|
5
5
|
ValidDomains = ["yandex.ru.", "yandex.net.", "yandex.com."]
|
6
6
|
|
7
|
-
def initialize(ip, resolver_config = nil)
|
8
|
-
super(ip, resolver_config)
|
9
|
-
end
|
10
|
-
|
11
7
|
def valid?
|
12
8
|
subdomain_of?(*Yandex::ValidDomains) && reverse_resolves?
|
13
9
|
end
|
data/lib/legitbot.rb
CHANGED
@@ -2,8 +2,10 @@ require 'resolv'
|
|
2
2
|
|
3
3
|
require_relative 'legitbot/legitbot'
|
4
4
|
require_relative 'legitbot/botmatch'
|
5
|
-
|
6
|
-
require_relative 'legitbot/yandex'
|
7
|
-
require_relative 'legitbot/bing'
|
5
|
+
|
8
6
|
require_relative 'legitbot/baidu'
|
7
|
+
require_relative 'legitbot/bing'
|
9
8
|
require_relative 'legitbot/duckduckgo'
|
9
|
+
require_relative 'legitbot/facebook'
|
10
|
+
require_relative 'legitbot/google'
|
11
|
+
require_relative 'legitbot/yandex'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'legitbot'
|
3
|
+
|
4
|
+
class FacebookTest < Minitest::Test
|
5
|
+
def test_valid_ip
|
6
|
+
ip = "69.63.186.89"
|
7
|
+
match = Legitbot::Facebook.new(ip)
|
8
|
+
assert match.valid?, msg: "#{ip} is a valid Facebook IP"
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_invalid_ip
|
12
|
+
ip = "127.0.0.1"
|
13
|
+
match = Legitbot::Facebook.new(ip)
|
14
|
+
assert match.fake?, msg: "#{ip} is a fake Facebook IP"
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_user_agent
|
18
|
+
bot = Legitbot.bot("facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)", "31.13.76.56")
|
19
|
+
assert_equal "Facebook", bot.detected_as
|
20
|
+
assert bot.valid?, msg: "A valid Facebook User-agent and IP"
|
21
|
+
end
|
22
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-12-
|
11
|
+
date: 2016-12-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: irrc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: segment_tree
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: concurrent-ruby
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
13
55
|
- !ruby/object:Gem::Dependency
|
14
56
|
name: rake
|
15
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -57,11 +99,13 @@ files:
|
|
57
99
|
- lib/legitbot/bing.rb
|
58
100
|
- lib/legitbot/botmatch.rb
|
59
101
|
- lib/legitbot/duckduckgo.rb
|
102
|
+
- lib/legitbot/facebook.rb
|
60
103
|
- lib/legitbot/google.rb
|
61
104
|
- lib/legitbot/legitbot.rb
|
62
105
|
- lib/legitbot/version.rb
|
63
106
|
- lib/legitbot/yandex.rb
|
64
107
|
- test/botmatch_test.rb
|
108
|
+
- test/facebook_test.rb
|
65
109
|
- test/google_test.rb
|
66
110
|
- test/legitbot_test.rb
|
67
111
|
homepage: https://github.com/alaz/legitbot
|
@@ -91,5 +135,6 @@ specification_version: 4
|
|
91
135
|
summary: Validate Web request was made by legitimate search engine
|
92
136
|
test_files:
|
93
137
|
- test/botmatch_test.rb
|
138
|
+
- test/facebook_test.rb
|
94
139
|
- test/google_test.rb
|
95
140
|
- test/legitbot_test.rb
|