legitbot 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f26aea921fa62377a7a180c49ac426cc42168c98
4
- data.tar.gz: fb2fa644e6cf65edf0f068417b63d0a650ceac5f
3
+ metadata.gz: 9624f762d4c29b00c2baf6f53ec6f6dacaec29d4
4
+ data.tar.gz: e6c2650c5471cb9075e9450fd8a5894f074ee05b
5
5
  SHA512:
6
- metadata.gz: 4c909f01468f9a4ea1a8f8cee045d03f1d0736f1868d43172f639379bb05a0b7ba1092ce678465a26495df282fe98b0f0a2542903390b5782b94230a62dff20d
7
- data.tar.gz: a8b36a573cda2eaba60cca50cc6a8a28356278257e634b8392e0a3ff719627d73eab9835ae9e3b21a248cd465362c904037c0c0f286533fbd2dabe68ca9532fb
6
+ metadata.gz: dfa1e4d4ec277f2785e49f5af55a630aed08271fa4c7dd24d93d2155d8da8f9522947169a392a2111d6519ab74a0fa3d6410f184f7f67e067403c421ab56ecc7
7
+ data.tar.gz: f3e765ee7215424adc1a91e990734bc8409634e321a0c2c37dab83e426c438be7134f432cf0424097489d7ce2397fd4a56711da3349f84cbef83ef26aace1f22
data/.gitignore CHANGED
@@ -2,3 +2,4 @@ Gemfile.lock
2
2
  .bundle
3
3
  *.gem
4
4
  *.gemfile.lock
5
+ /tags
data/README.md CHANGED
@@ -33,19 +33,21 @@ end
33
33
 
34
34
  ## Supported
35
35
 
36
- * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
37
- * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
38
- * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
36
+ * [Applebot](https://support.apple.com/en-us/HT204683)
39
37
  * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
38
+ * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
40
39
  * [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
41
40
  * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
41
+ * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
42
+ * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
43
+ * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
42
44
 
43
45
  ## Issues, problems, plans
44
46
 
45
47
  * Rails middleware
46
48
  * More testing for Facebook
47
- * Review for thread safety
48
49
  * Make it possible to reload Facebook IP ranges
50
+ * Bots masquerading as someone else, e.g. `Telegram (like Twitter)` - what to do?
49
51
 
50
52
  ## License
51
53
 
@@ -17,7 +17,6 @@ Gem::Specification.new do |spec|
17
17
  spec.required_ruby_version = '>= 2.0.0'
18
18
  spec.add_dependency "irrc"
19
19
  spec.add_dependency "segment_tree"
20
- spec.add_dependency "concurrent-ruby"
21
20
  spec.add_development_dependency "rake"
22
21
  spec.add_development_dependency "minitest"
23
22
 
@@ -1,11 +1,11 @@
1
- require 'resolv'
2
-
3
1
  require_relative 'legitbot/legitbot'
4
2
  require_relative 'legitbot/botmatch'
5
3
 
4
+ require_relative 'legitbot/apple'
6
5
  require_relative 'legitbot/baidu'
7
6
  require_relative 'legitbot/bing'
8
7
  require_relative 'legitbot/duckduckgo'
9
8
  require_relative 'legitbot/facebook'
10
9
  require_relative 'legitbot/google'
10
+ require_relative 'legitbot/pinterest'
11
11
  require_relative 'legitbot/yandex'
@@ -0,0 +1,16 @@
1
+ require 'ipaddr'
2
+
3
+ module Legitbot
4
+ # https://support.apple.com/en-us/HT204683
5
+
6
+ class Apple < BotMatch
7
+ Range = IPAddr.new('17.0.0.0/8')
8
+
9
+ def valid?
10
+ ip = IPAddr.new @ip
11
+ Range.include? ip
12
+ end
13
+ end
14
+
15
+ rule Legitbot::Apple, %w(Applebot)
16
+ end
@@ -1,3 +1,6 @@
1
+ require 'resolv'
2
+ require 'ipaddr'
3
+
1
4
  module Legitbot
2
5
  ##
3
6
  # Represents a bot instance match. Typical methods are
@@ -1,13 +1,16 @@
1
1
  require 'segment_tree'
2
2
  require 'irrc'
3
- require 'concurrent'
3
+ require 'monitor'
4
+ require 'ipaddr'
4
5
 
5
6
  module Legitbot
6
7
  # https://developers.facebook.com/docs/sharing/webmasters/crawler
7
8
 
8
9
  class Facebook < BotMatch
10
+ lock = Monitor.new
11
+
9
12
  AS = 'AS32934'
10
- ValidIPs = Concurrent::Delay.new do
13
+ ValidIPs = lock.synchronize do
11
14
  client = Irrc::Client.new
12
15
  client.query :radb, 'AS32934'
13
16
  results = client.perform
@@ -21,11 +24,7 @@ module Legitbot
21
24
 
22
25
  def valid?
23
26
  ip = IPAddr.new(@ip)
24
- if ip.ipv4?
25
- ValidIPs.value[:ipv4].find(ip)
26
- else
27
- ValidIPs.value[:ipv6].find(ip)
28
- end
27
+ ValidIPs[ip.ipv4? ? :ipv4 : :ipv6].find(ip)
29
28
  end
30
29
  end
31
30
 
@@ -1,5 +1,3 @@
1
- require 'resolv'
2
-
3
1
  module Legitbot
4
2
  # https://support.google.com/webmasters/answer/1061943
5
3
  # https://support.google.com/webmasters/answer/80553
@@ -0,0 +1,13 @@
1
+ module Legitbot
2
+ # https://help.pinterest.com/en/articles/about-pinterest-crawler-0
3
+
4
+ class Pinterest < BotMatch
5
+ ValidDomains = ["pinterest.com."]
6
+
7
+ def valid?
8
+ subdomain_of?(*Pinterest::ValidDomains) && reverse_resolves?
9
+ end
10
+ end
11
+
12
+ rule Legitbot::Pinterest, %w(Pinterestbot Pinterest)
13
+ end
@@ -1,3 +1,3 @@
1
1
  module Legitbot
2
- VERSION = '0.1.0'
2
+ VERSION = '0.1.2'
3
3
  end
@@ -0,0 +1,22 @@
1
+ require 'minitest/autorun'
2
+ require 'legitbot'
3
+
4
+ class AppleTest < Minitest::Test
5
+ def test_valid_ip
6
+ ip = "17.58.98.60"
7
+ match = Legitbot::Apple.new(ip)
8
+ assert match.valid?, msg: "#{ip} is a valid Applebot IP"
9
+ end
10
+
11
+ def test_invalid_ip
12
+ ip = "127.0.0.1"
13
+ match = Legitbot::Apple.new(ip)
14
+ assert match.fake?, msg: "#{ip} is a fake Applebot IP"
15
+ end
16
+
17
+ def test_user_agent
18
+ bot = Legitbot.bot("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)", "17.58.98.60")
19
+ assert_equal "Apple", bot.detected_as
20
+ assert bot.valid?, msg: "A valid Applebot User-agent and IP"
21
+ end
22
+ end
@@ -3,22 +3,22 @@ require 'legitbot'
3
3
 
4
4
  class BotMatchTest < Minitest::Test
5
5
  def test_reverse_name
6
- match = Legitbot::BotMatch.new "66.249.78.6"
7
- assert_equal "crawl-66-249-78-6.googlebot.com", match.reverse_name
6
+ match = Legitbot::BotMatch.new "66.249.64.141"
7
+ assert_equal "crawl-66-249-64-141.googlebot.com", match.reverse_name
8
8
  end
9
9
 
10
10
  def test_reverse_ip
11
- match = Legitbot::BotMatch.new "66.249.78.6"
12
- assert_equal "66.249.78.6", match.reversed_ip
11
+ match = Legitbot::BotMatch.new "66.249.64.141"
12
+ assert_equal "66.249.64.141", match.reversed_ip
13
13
  end
14
14
 
15
15
  def test_reverse_resolves
16
- match = Legitbot::BotMatch.new "66.249.78.6"
16
+ match = Legitbot::BotMatch.new "66.249.64.141"
17
17
  assert_equal true, match.reverse_resolves?
18
18
  end
19
19
 
20
20
  def test_valid_class_syntax
21
- assert Legitbot::Google.valid?("66.249.78.6"), msg: "Valid Googlebot"
21
+ assert Legitbot::Google.valid?("66.249.64.141"), msg: "Valid Googlebot"
22
22
  assert Legitbot::Google.fake?("149.210.164.47"), msg: "Fake Googlebot"
23
23
  end
24
24
  end
@@ -11,7 +11,7 @@ class GoogleTest < Minitest::Test
11
11
  end
12
12
 
13
13
  def test_valid_ip
14
- ip = "66.249.78.6"
14
+ ip = "66.249.64.141"
15
15
  match = Legitbot::Google.new ip
16
16
  reverse_name = match.reverse_name
17
17
  assert match.subdomain_of?("googlebot.com."), msg: "#{reverse_name} is a subdomain of googlebot.com"
@@ -25,13 +25,13 @@ class GoogleTest < Minitest::Test
25
25
  end
26
26
 
27
27
  def test_valid_ua
28
- bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.78.6")
28
+ bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.64.141")
29
29
  assert bot, msg: "Googlebot detected from User-Agent"
30
30
  assert bot.valid?, msg: "Valid Googlebot"
31
31
  end
32
32
 
33
33
  def test_engine_name
34
- bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.78.6")
34
+ bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.64.141")
35
35
  assert_equal "Google", bot.detected_as
36
36
  end
37
37
  end
@@ -0,0 +1,37 @@
1
+ require 'minitest/autorun'
2
+ require 'legitbot'
3
+
4
+ class PinterestTest < Minitest::Test
5
+ def test_malicious_ip
6
+ ip = "149.210.164.47"
7
+ match = Legitbot::Pinterest.new ip
8
+ reverse_name = match.reverse_name
9
+ assert !match.subdomain_of?("pinterest.com."), msg: "#{reverse_name} is not a subdomain of pinterest.com"
10
+ assert !match.valid?, msg: "#{ip} is not a real Pinterest IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = "54.236.1.11"
15
+ match = Legitbot::Pinterest.new ip
16
+ reverse_name = match.reverse_name
17
+ assert match.subdomain_of?("pinterest.com."), msg: "#{reverse_name} is a subdomain of pinterest.com"
18
+ assert match.valid?, msg: "#{ip} is a valid Pinterest IP"
19
+ end
20
+
21
+ def test_malicious_ua
22
+ bot = Legitbot.bot("Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)", "149.210.164.47")
23
+ assert bot, msg: "Pinterest detected from User-Agent"
24
+ assert !bot.valid?, msg: "Not a valid Pinterest"
25
+ end
26
+
27
+ def test_valid_ua
28
+ bot = Legitbot.bot("Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)", "54.236.1.11")
29
+ assert bot, msg: "Pinterest detected from User-Agent"
30
+ assert bot.valid?, msg: "Valid Pinterest"
31
+ end
32
+
33
+ def test_engine_name
34
+ bot = Legitbot.bot("Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)", "54.236.1.11")
35
+ assert_equal "Pinterest", bot.detected_as
36
+ end
37
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-21 00:00:00.000000000 Z
11
+ date: 2018-05-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: irrc
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: concurrent-ruby
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: rake
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -95,6 +81,7 @@ files:
95
81
  - Rakefile
96
82
  - legitbot.gemspec
97
83
  - lib/legitbot.rb
84
+ - lib/legitbot/apple.rb
98
85
  - lib/legitbot/baidu.rb
99
86
  - lib/legitbot/bing.rb
100
87
  - lib/legitbot/botmatch.rb
@@ -102,12 +89,15 @@ files:
102
89
  - lib/legitbot/facebook.rb
103
90
  - lib/legitbot/google.rb
104
91
  - lib/legitbot/legitbot.rb
92
+ - lib/legitbot/pinterest.rb
105
93
  - lib/legitbot/version.rb
106
94
  - lib/legitbot/yandex.rb
95
+ - test/apple_test.rb
107
96
  - test/botmatch_test.rb
108
97
  - test/facebook_test.rb
109
98
  - test/google_test.rb
110
99
  - test/legitbot_test.rb
100
+ - test/pinterest_test.rb
111
101
  homepage: https://github.com/alaz/legitbot
112
102
  licenses:
113
103
  - Apache-2.0
@@ -129,12 +119,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
129
119
  version: '0'
130
120
  requirements: []
131
121
  rubyforge_project:
132
- rubygems_version: 2.5.2
122
+ rubygems_version: 2.5.2.3
133
123
  signing_key:
134
124
  specification_version: 4
135
125
  summary: Validate Web request was made by legitimate search engine
136
126
  test_files:
127
+ - test/legitbot_test.rb
128
+ - test/pinterest_test.rb
129
+ - test/apple_test.rb
130
+ - test/google_test.rb
137
131
  - test/botmatch_test.rb
138
132
  - test/facebook_test.rb
139
- - test/google_test.rb
140
- - test/legitbot_test.rb