legitbot 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +6 -4
- data/legitbot.gemspec +0 -1
- data/lib/legitbot.rb +2 -2
- data/lib/legitbot/apple.rb +16 -0
- data/lib/legitbot/botmatch.rb +3 -0
- data/lib/legitbot/facebook.rb +6 -7
- data/lib/legitbot/google.rb +0 -2
- data/lib/legitbot/pinterest.rb +13 -0
- data/lib/legitbot/version.rb +1 -1
- data/test/apple_test.rb +22 -0
- data/test/botmatch_test.rb +6 -6
- data/test/google_test.rb +3 -3
- data/test/pinterest_test.rb +37 -0
- metadata +11 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9624f762d4c29b00c2baf6f53ec6f6dacaec29d4
|
4
|
+
data.tar.gz: e6c2650c5471cb9075e9450fd8a5894f074ee05b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dfa1e4d4ec277f2785e49f5af55a630aed08271fa4c7dd24d93d2155d8da8f9522947169a392a2111d6519ab74a0fa3d6410f184f7f67e067403c421ab56ecc7
|
7
|
+
data.tar.gz: f3e765ee7215424adc1a91e990734bc8409634e321a0c2c37dab83e426c438be7134f432cf0424097489d7ce2397fd4a56711da3349f84cbef83ef26aace1f22
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -33,19 +33,21 @@ end
|
|
33
33
|
|
34
34
|
## Supported
|
35
35
|
|
36
|
-
* [
|
37
|
-
* [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
|
38
|
-
* [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
36
|
+
* [Applebot](https://support.apple.com/en-us/HT204683)
|
39
37
|
* [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
38
|
+
* [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
40
39
|
* [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
|
41
40
|
* [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
|
41
|
+
* [Google crawlers](https://support.google.com/webmasters/answer/1061943)
|
42
|
+
* [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
|
43
|
+
* [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
|
42
44
|
|
43
45
|
## Issues, problems, plans
|
44
46
|
|
45
47
|
* Rails middleware
|
46
48
|
* More testing for Facebook
|
47
|
-
* Review for thread safety
|
48
49
|
* Make it possible to reload Facebook IP ranges
|
50
|
+
* Bots masquerading as someone else, e.g. `Telegram (like Twitter)` - what to do?
|
49
51
|
|
50
52
|
## License
|
51
53
|
|
data/legitbot.gemspec
CHANGED
@@ -17,7 +17,6 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.required_ruby_version = '>= 2.0.0'
|
18
18
|
spec.add_dependency "irrc"
|
19
19
|
spec.add_dependency "segment_tree"
|
20
|
-
spec.add_dependency "concurrent-ruby"
|
21
20
|
spec.add_development_dependency "rake"
|
22
21
|
spec.add_development_dependency "minitest"
|
23
22
|
|
data/lib/legitbot.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
require 'resolv'
|
2
|
-
|
3
1
|
require_relative 'legitbot/legitbot'
|
4
2
|
require_relative 'legitbot/botmatch'
|
5
3
|
|
4
|
+
require_relative 'legitbot/apple'
|
6
5
|
require_relative 'legitbot/baidu'
|
7
6
|
require_relative 'legitbot/bing'
|
8
7
|
require_relative 'legitbot/duckduckgo'
|
9
8
|
require_relative 'legitbot/facebook'
|
10
9
|
require_relative 'legitbot/google'
|
10
|
+
require_relative 'legitbot/pinterest'
|
11
11
|
require_relative 'legitbot/yandex'
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'ipaddr'
|
2
|
+
|
3
|
+
module Legitbot
|
4
|
+
# https://support.apple.com/en-us/HT204683
|
5
|
+
|
6
|
+
class Apple < BotMatch
|
7
|
+
Range = IPAddr.new('17.0.0.0/8')
|
8
|
+
|
9
|
+
def valid?
|
10
|
+
ip = IPAddr.new @ip
|
11
|
+
Range.include? ip
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
rule Legitbot::Apple, %w(Applebot)
|
16
|
+
end
|
data/lib/legitbot/botmatch.rb
CHANGED
data/lib/legitbot/facebook.rb
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
require 'segment_tree'
|
2
2
|
require 'irrc'
|
3
|
-
require '
|
3
|
+
require 'monitor'
|
4
|
+
require 'ipaddr'
|
4
5
|
|
5
6
|
module Legitbot
|
6
7
|
# https://developers.facebook.com/docs/sharing/webmasters/crawler
|
7
8
|
|
8
9
|
class Facebook < BotMatch
|
10
|
+
lock = Monitor.new
|
11
|
+
|
9
12
|
AS = 'AS32934'
|
10
|
-
ValidIPs =
|
13
|
+
ValidIPs = lock.synchronize do
|
11
14
|
client = Irrc::Client.new
|
12
15
|
client.query :radb, 'AS32934'
|
13
16
|
results = client.perform
|
@@ -21,11 +24,7 @@ module Legitbot
|
|
21
24
|
|
22
25
|
def valid?
|
23
26
|
ip = IPAddr.new(@ip)
|
24
|
-
|
25
|
-
ValidIPs.value[:ipv4].find(ip)
|
26
|
-
else
|
27
|
-
ValidIPs.value[:ipv6].find(ip)
|
28
|
-
end
|
27
|
+
ValidIPs[ip.ipv4? ? :ipv4 : :ipv6].find(ip)
|
29
28
|
end
|
30
29
|
end
|
31
30
|
|
data/lib/legitbot/google.rb
CHANGED
@@ -0,0 +1,13 @@
|
|
1
|
+
module Legitbot
|
2
|
+
# https://help.pinterest.com/en/articles/about-pinterest-crawler-0
|
3
|
+
|
4
|
+
class Pinterest < BotMatch
|
5
|
+
ValidDomains = ["pinterest.com."]
|
6
|
+
|
7
|
+
def valid?
|
8
|
+
subdomain_of?(*Pinterest::ValidDomains) && reverse_resolves?
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
rule Legitbot::Pinterest, %w(Pinterestbot Pinterest)
|
13
|
+
end
|
data/lib/legitbot/version.rb
CHANGED
data/test/apple_test.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'legitbot'
|
3
|
+
|
4
|
+
class AppleTest < Minitest::Test
|
5
|
+
def test_valid_ip
|
6
|
+
ip = "17.58.98.60"
|
7
|
+
match = Legitbot::Apple.new(ip)
|
8
|
+
assert match.valid?, msg: "#{ip} is a valid Applebot IP"
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_invalid_ip
|
12
|
+
ip = "127.0.0.1"
|
13
|
+
match = Legitbot::Apple.new(ip)
|
14
|
+
assert match.fake?, msg: "#{ip} is a fake Applebot IP"
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_user_agent
|
18
|
+
bot = Legitbot.bot("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)", "17.58.98.60")
|
19
|
+
assert_equal "Apple", bot.detected_as
|
20
|
+
assert bot.valid?, msg: "A valid Applebot User-agent and IP"
|
21
|
+
end
|
22
|
+
end
|
data/test/botmatch_test.rb
CHANGED
@@ -3,22 +3,22 @@ require 'legitbot'
|
|
3
3
|
|
4
4
|
class BotMatchTest < Minitest::Test
|
5
5
|
def test_reverse_name
|
6
|
-
match = Legitbot::BotMatch.new "66.249.
|
7
|
-
assert_equal "crawl-66-249-
|
6
|
+
match = Legitbot::BotMatch.new "66.249.64.141"
|
7
|
+
assert_equal "crawl-66-249-64-141.googlebot.com", match.reverse_name
|
8
8
|
end
|
9
9
|
|
10
10
|
def test_reverse_ip
|
11
|
-
match = Legitbot::BotMatch.new "66.249.
|
12
|
-
assert_equal "66.249.
|
11
|
+
match = Legitbot::BotMatch.new "66.249.64.141"
|
12
|
+
assert_equal "66.249.64.141", match.reversed_ip
|
13
13
|
end
|
14
14
|
|
15
15
|
def test_reverse_resolves
|
16
|
-
match = Legitbot::BotMatch.new "66.249.
|
16
|
+
match = Legitbot::BotMatch.new "66.249.64.141"
|
17
17
|
assert_equal true, match.reverse_resolves?
|
18
18
|
end
|
19
19
|
|
20
20
|
def test_valid_class_syntax
|
21
|
-
assert Legitbot::Google.valid?("66.249.
|
21
|
+
assert Legitbot::Google.valid?("66.249.64.141"), msg: "Valid Googlebot"
|
22
22
|
assert Legitbot::Google.fake?("149.210.164.47"), msg: "Fake Googlebot"
|
23
23
|
end
|
24
24
|
end
|
data/test/google_test.rb
CHANGED
@@ -11,7 +11,7 @@ class GoogleTest < Minitest::Test
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def test_valid_ip
|
14
|
-
ip = "66.249.
|
14
|
+
ip = "66.249.64.141"
|
15
15
|
match = Legitbot::Google.new ip
|
16
16
|
reverse_name = match.reverse_name
|
17
17
|
assert match.subdomain_of?("googlebot.com."), msg: "#{reverse_name} is a subdomain of googlebot.com"
|
@@ -25,13 +25,13 @@ class GoogleTest < Minitest::Test
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def test_valid_ua
|
28
|
-
bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.
|
28
|
+
bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.64.141")
|
29
29
|
assert bot, msg: "Googlebot detected from User-Agent"
|
30
30
|
assert bot.valid?, msg: "Valid Googlebot"
|
31
31
|
end
|
32
32
|
|
33
33
|
def test_engine_name
|
34
|
-
bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.
|
34
|
+
bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.64.141")
|
35
35
|
assert_equal "Google", bot.detected_as
|
36
36
|
end
|
37
37
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'legitbot'
|
3
|
+
|
4
|
+
class PinterestTest < Minitest::Test
|
5
|
+
def test_malicious_ip
|
6
|
+
ip = "149.210.164.47"
|
7
|
+
match = Legitbot::Pinterest.new ip
|
8
|
+
reverse_name = match.reverse_name
|
9
|
+
assert !match.subdomain_of?("pinterest.com."), msg: "#{reverse_name} is not a subdomain of pinterest.com"
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real Pinterest IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = "54.236.1.11"
|
15
|
+
match = Legitbot::Pinterest.new ip
|
16
|
+
reverse_name = match.reverse_name
|
17
|
+
assert match.subdomain_of?("pinterest.com."), msg: "#{reverse_name} is a subdomain of pinterest.com"
|
18
|
+
assert match.valid?, msg: "#{ip} is a valid Pinterest IP"
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_malicious_ua
|
22
|
+
bot = Legitbot.bot("Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)", "149.210.164.47")
|
23
|
+
assert bot, msg: "Pinterest detected from User-Agent"
|
24
|
+
assert !bot.valid?, msg: "Not a valid Pinterest"
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_valid_ua
|
28
|
+
bot = Legitbot.bot("Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)", "54.236.1.11")
|
29
|
+
assert bot, msg: "Pinterest detected from User-Agent"
|
30
|
+
assert bot.valid?, msg: "Valid Pinterest"
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_engine_name
|
34
|
+
bot = Legitbot.bot("Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)", "54.236.1.11")
|
35
|
+
assert_equal "Pinterest", bot.detected_as
|
36
|
+
end
|
37
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-05-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: irrc
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: concurrent-ruby
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: rake
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -95,6 +81,7 @@ files:
|
|
95
81
|
- Rakefile
|
96
82
|
- legitbot.gemspec
|
97
83
|
- lib/legitbot.rb
|
84
|
+
- lib/legitbot/apple.rb
|
98
85
|
- lib/legitbot/baidu.rb
|
99
86
|
- lib/legitbot/bing.rb
|
100
87
|
- lib/legitbot/botmatch.rb
|
@@ -102,12 +89,15 @@ files:
|
|
102
89
|
- lib/legitbot/facebook.rb
|
103
90
|
- lib/legitbot/google.rb
|
104
91
|
- lib/legitbot/legitbot.rb
|
92
|
+
- lib/legitbot/pinterest.rb
|
105
93
|
- lib/legitbot/version.rb
|
106
94
|
- lib/legitbot/yandex.rb
|
95
|
+
- test/apple_test.rb
|
107
96
|
- test/botmatch_test.rb
|
108
97
|
- test/facebook_test.rb
|
109
98
|
- test/google_test.rb
|
110
99
|
- test/legitbot_test.rb
|
100
|
+
- test/pinterest_test.rb
|
111
101
|
homepage: https://github.com/alaz/legitbot
|
112
102
|
licenses:
|
113
103
|
- Apache-2.0
|
@@ -129,12 +119,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
119
|
version: '0'
|
130
120
|
requirements: []
|
131
121
|
rubyforge_project:
|
132
|
-
rubygems_version: 2.5.2
|
122
|
+
rubygems_version: 2.5.2.3
|
133
123
|
signing_key:
|
134
124
|
specification_version: 4
|
135
125
|
summary: Validate Web request was made by legitimate search engine
|
136
126
|
test_files:
|
127
|
+
- test/legitbot_test.rb
|
128
|
+
- test/pinterest_test.rb
|
129
|
+
- test/apple_test.rb
|
130
|
+
- test/google_test.rb
|
137
131
|
- test/botmatch_test.rb
|
138
132
|
- test/facebook_test.rb
|
139
|
-
- test/google_test.rb
|
140
|
-
- test/legitbot_test.rb
|