legitbot 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 394b73c86f4a4aefe5cd96a6483ec5f000aaed04
4
- data.tar.gz: 7b0fbc498167e635487b4e659dcd88aa95a7360a
3
+ metadata.gz: f26aea921fa62377a7a180c49ac426cc42168c98
4
+ data.tar.gz: fb2fa644e6cf65edf0f068417b63d0a650ceac5f
5
5
  SHA512:
6
- metadata.gz: 82e4a3b94efee99fee6a0cacea416c9596e9d383def2ea5191211ce087f3cf46b5240a87c43d7859a90560eca04c07c3eedebe9b4efb389d9697e28f364f749c
7
- data.tar.gz: bd7ff484dc01003c95b6dbd399f4c795480a7d1a194d6c09e98b4d03bc51b26790f6c4b6190ae06c89e0afcc71c50d293b5fb9a8a3a5f078c4d7d87bb70bcc76
6
+ metadata.gz: 4c909f01468f9a4ea1a8f8cee045d03f1d0736f1868d43172f639379bb05a0b7ba1092ce678465a26495df282fe98b0f0a2542903390b5782b94230a62dff20d
7
+ data.tar.gz: a8b36a573cda2eaba60cca50cc6a8a28356278257e634b8392e0a3ff719627d73eab9835ae9e3b21a248cd465362c904037c0c0f286533fbd2dabe68ca9532fb
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Legitbot
1
+ # Legitbot [![Build Status](https://secure.travis-ci.org/alaz/legitbot.png?branch=master)](http://travis-ci.org/alaz/legitbot) [![Gem Version](https://badge.fury.io/rb/legitbot.svg)](https://badge.fury.io/rb/legitbot)
2
2
 
3
3
  Ruby gem to check if an IP really belongs to some bot, typically a search
4
4
  engine. This can of much help if one wants to protect his/her web site from
@@ -31,10 +31,21 @@ Rack::Attack.blocklist("fake Googlebot") do |req|
31
31
  end
32
32
  ```
33
33
 
34
+ ## Supported
35
+
36
+ * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
37
+ * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
38
+ * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
39
+ * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
40
+ * [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
41
+ * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
42
+
34
43
  ## Issues, problems, plans
35
44
 
36
45
  * Rails middleware
37
- * Facebook: https://developers.facebook.com/docs/sharing/webmasters/crawler
46
+ * More testing for Facebook
47
+ * Review for thread safety
48
+ * Make it possible to reload Facebook IP ranges
38
49
 
39
50
  ## License
40
51
 
data/Rakefile CHANGED
@@ -12,4 +12,11 @@ Rake::TestTask.new do |t|
12
12
  t.verbose = true
13
13
  end
14
14
 
15
+ desc 'Start a console'
16
+ task :console do
17
+ require 'irb'
18
+ ARGV.clear
19
+ IRB.start
20
+ end
21
+
15
22
  task default: %w[test]
data/legitbot.gemspec CHANGED
@@ -15,6 +15,9 @@ Gem::Specification.new do |spec|
15
15
  "made by a real search engine, not a fake"
16
16
 
17
17
  spec.required_ruby_version = '>= 2.0.0'
18
+ spec.add_dependency "irrc"
19
+ spec.add_dependency "segment_tree"
20
+ spec.add_dependency "concurrent-ruby"
18
21
  spec.add_development_dependency "rake"
19
22
  spec.add_development_dependency "minitest"
20
23
 
@@ -3,10 +3,6 @@ module Legitbot
3
3
  class Baidu < BotMatch
4
4
  ValidDomains = ["baidu.com.", "baidu.jp."]
5
5
 
6
- def initialize(ip, resolver_config = nil)
7
- super(ip, resolver_config)
8
- end
9
-
10
6
  def valid?
11
7
  subdomain_of?(*Baidu::ValidDomains)
12
8
  end
data/lib/legitbot/bing.rb CHANGED
@@ -3,10 +3,6 @@ module Legitbot
3
3
  class Bing < BotMatch
4
4
  ValidDomains = ["search.msn.com."]
5
5
 
6
- def initialize(ip, resolver_config = nil)
7
- super(ip, resolver_config)
8
- end
9
-
10
6
  def valid?
11
7
  subdomain_of?(*Bing::ValidDomains) && reverse_resolves?
12
8
  end
@@ -3,10 +3,6 @@ module Legitbot
3
3
  class DuckDuckGo < BotMatch
4
4
  ValidIPs = %w(72.94.249.34 72.94.249.35 72.94.249.36 72.94.249.37 72.94.249.38)
5
5
 
6
- def initialize(ip, resolver_config = nil)
7
- super(ip, resolver_config)
8
- end
9
-
10
6
  def valid?
11
7
  DuckDuckGo::ValidIPs.include? @ip
12
8
  end
@@ -0,0 +1,33 @@
1
+ require 'segment_tree'
2
+ require 'irrc'
3
+ require 'concurrent'
4
+
5
+ module Legitbot
6
+ # https://developers.facebook.com/docs/sharing/webmasters/crawler
7
+
8
+ class Facebook < BotMatch
9
+ AS = 'AS32934'
10
+ ValidIPs = Concurrent::Delay.new do
11
+ client = Irrc::Client.new
12
+ client.query :radb, 'AS32934'
13
+ results = client.perform
14
+
15
+ Hash[%i(ipv4 ipv6).map { |k|
16
+ [k, SegmentTree.new(results[AS][k][AS].map { |cidr|
17
+ [IPAddr.new(cidr).to_range, true]
18
+ })]
19
+ }]
20
+ end
21
+
22
+ def valid?
23
+ ip = IPAddr.new(@ip)
24
+ if ip.ipv4?
25
+ ValidIPs.value[:ipv4].find(ip)
26
+ else
27
+ ValidIPs.value[:ipv6].find(ip)
28
+ end
29
+ end
30
+ end
31
+
32
+ rule Legitbot::Facebook, %w(facebookhit facebookexternalhit)
33
+ end
@@ -7,10 +7,6 @@ module Legitbot
7
7
  class Google < BotMatch
8
8
  ValidDomains = ["google.com.", "googlebot.com."]
9
9
 
10
- def initialize(ip, resolver_config = nil)
11
- super(ip, resolver_config)
12
- end
13
-
14
10
  def valid?
15
11
  subdomain_of?(*Google::ValidDomains) && reverse_resolves?
16
12
  end
@@ -1,3 +1,3 @@
1
1
  module Legitbot
2
- VERSION = '0.0.1'
2
+ VERSION = '0.1.0'
3
3
  end
@@ -4,10 +4,6 @@ module Legitbot
4
4
  class Yandex < BotMatch
5
5
  ValidDomains = ["yandex.ru.", "yandex.net.", "yandex.com."]
6
6
 
7
- def initialize(ip, resolver_config = nil)
8
- super(ip, resolver_config)
9
- end
10
-
11
7
  def valid?
12
8
  subdomain_of?(*Yandex::ValidDomains) && reverse_resolves?
13
9
  end
data/lib/legitbot.rb CHANGED
@@ -2,8 +2,10 @@ require 'resolv'
2
2
 
3
3
  require_relative 'legitbot/legitbot'
4
4
  require_relative 'legitbot/botmatch'
5
- require_relative 'legitbot/google'
6
- require_relative 'legitbot/yandex'
7
- require_relative 'legitbot/bing'
5
+
8
6
  require_relative 'legitbot/baidu'
7
+ require_relative 'legitbot/bing'
9
8
  require_relative 'legitbot/duckduckgo'
9
+ require_relative 'legitbot/facebook'
10
+ require_relative 'legitbot/google'
11
+ require_relative 'legitbot/yandex'
@@ -0,0 +1,22 @@
1
+ require 'minitest/autorun'
2
+ require 'legitbot'
3
+
4
+ class FacebookTest < Minitest::Test
5
+ def test_valid_ip
6
+ ip = "69.63.186.89"
7
+ match = Legitbot::Facebook.new(ip)
8
+ assert match.valid?, msg: "#{ip} is a valid Facebook IP"
9
+ end
10
+
11
+ def test_invalid_ip
12
+ ip = "127.0.0.1"
13
+ match = Legitbot::Facebook.new(ip)
14
+ assert match.fake?, msg: "#{ip} is a fake Facebook IP"
15
+ end
16
+
17
+ def test_user_agent
18
+ bot = Legitbot.bot("facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)", "31.13.76.56")
19
+ assert_equal "Facebook", bot.detected_as
20
+ assert bot.valid?, msg: "A valid Facebook User-agent and IP"
21
+ end
22
+ end
metadata CHANGED
@@ -1,15 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-20 00:00:00.000000000 Z
11
+ date: 2016-12-21 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: irrc
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: segment_tree
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: concurrent-ruby
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
13
55
  - !ruby/object:Gem::Dependency
14
56
  name: rake
15
57
  requirement: !ruby/object:Gem::Requirement
@@ -57,11 +99,13 @@ files:
57
99
  - lib/legitbot/bing.rb
58
100
  - lib/legitbot/botmatch.rb
59
101
  - lib/legitbot/duckduckgo.rb
102
+ - lib/legitbot/facebook.rb
60
103
  - lib/legitbot/google.rb
61
104
  - lib/legitbot/legitbot.rb
62
105
  - lib/legitbot/version.rb
63
106
  - lib/legitbot/yandex.rb
64
107
  - test/botmatch_test.rb
108
+ - test/facebook_test.rb
65
109
  - test/google_test.rb
66
110
  - test/legitbot_test.rb
67
111
  homepage: https://github.com/alaz/legitbot
@@ -91,5 +135,6 @@ specification_version: 4
91
135
  summary: Validate Web request was made by legitimate search engine
92
136
  test_files:
93
137
  - test/botmatch_test.rb
138
+ - test/facebook_test.rb
94
139
  - test/google_test.rb
95
140
  - test/legitbot_test.rb