legitbot 0.3.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d775b4e4e434615989b8ef35aca9ac6498b06742ca6e8534f9bb4d6f3d5c9b04
4
- data.tar.gz: 239069018d1b043ec99bce8496b3760fc189611f6aa83545835d5f71ce7f94c5
3
+ metadata.gz: 34d1432c7c405d783b22a46851db88ccdea9f303defeccdd1cf98604bbb6ce09
4
+ data.tar.gz: a66b586f4b2dca67fb875ea37add6e7d89a7ce5d0705c3d1898d96ecf091036e
5
5
  SHA512:
6
- metadata.gz: '08e08f94cbb4979566982d91a525f7d03618c9c10e85613bcebfd51f8f8f8b0dab669ba9cd1b67c7a6d6a36c23d23ad88b64996bf252bd884082045ae590d38c'
7
- data.tar.gz: 97c4dd80bf62205ea4a03a0bc7570d57520611b4015f3bf9f66b450d78878a3fde1c9097e0ac79691e40d5f29e9ba676aaa3af70f71650adb61c7f4a0ee1ddf0
6
+ metadata.gz: cad1db2571e939020f74e871365c4748dd78ff18eb8ad9f005ea5bf5b0707835e296afa2601fc6309c994f69b7903d21da788fc219f5d712ee75e1ae9885fb7b
7
+ data.tar.gz: c72af598d60c55aff35a1b5e244dcde160a67589c588d68b9482d5e5c5f0590441c92505ec94beb462ff70a730ec9aabe80877a2d9db2f72c566c3a9c0b19059
@@ -0,0 +1,60 @@
1
+ name: build
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ test:
7
+ runs-on: ubuntu-latest
8
+
9
+ strategy:
10
+ fail-fast: false
11
+ matrix:
12
+ ruby: [ jruby, 2.6 ]
13
+
14
+ steps:
15
+ - uses: actions/checkout@v2
16
+ - name: Set up Ruby
17
+ uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby }}
20
+ - name: Cache dependencies
21
+ uses: actions/cache@v1
22
+ with:
23
+ path: vendor/bundle
24
+ key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
25
+ restore-keys: |
26
+ ${{ runner.os }}-${{ matrix.ruby }}-gems-
27
+ - name: Install dependencies
28
+ run: |
29
+ bundle config path vendor/bundle
30
+ bundle install --jobs 4 --retry 3
31
+ - name: Run tests
32
+ run: bundle exec rake test
33
+
34
+ lint:
35
+ needs: test
36
+ runs-on: ubuntu-latest
37
+
38
+ strategy:
39
+ matrix:
40
+ ruby: [ 2.6 ]
41
+
42
+ steps:
43
+ - uses: actions/checkout@v2
44
+ - name: Set up Ruby
45
+ uses: ruby/setup-ruby@v1
46
+ with:
47
+ ruby-version: ${{ matrix.ruby }}
48
+ - name: Cache dependencies
49
+ uses: actions/cache@v1
50
+ with:
51
+ path: vendor/bundle
52
+ key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
53
+ restore-keys: |
54
+ ${{ runner.os }}-${{ matrix.ruby }}-gems-
55
+ - name: Install dependencies
56
+ run: |
57
+ bundle config path vendor/bundle
58
+ bundle install --jobs 4 --retry 3
59
+ - name: Run linter
60
+ run: bundle exec rubocop
data/.gitignore CHANGED
@@ -4,3 +4,4 @@ Gemfile.lock
4
4
  *.gemfile.lock
5
5
  /pkg
6
6
  /tags
7
+ /vendor
@@ -0,0 +1,2 @@
1
+ AllCops:
2
+ CacheRootDirectory: 'vendor'
@@ -0,0 +1 @@
1
+ 2.4
data/Gemfile CHANGED
@@ -1,2 +1,4 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
  gemspec
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Legitbot [![Build Status](https://secure.travis-ci.org/alaz/legitbot.png?branch=master)](http://travis-ci.org/alaz/legitbot) [![Gem Version](https://badge.fury.io/rb/legitbot.svg)](https://badge.fury.io/rb/legitbot)
1
+ # Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) [![Gem Version](https://badge.fury.io/rb/legitbot.svg)](https://badge.fury.io/rb/legitbot)
2
2
 
3
3
  Ruby gem to check that an IP belongs to a bot, typically a search
4
4
  engine. This can be of help in protecting a web site from fake search
@@ -50,7 +50,9 @@ end
50
50
  * [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
51
51
  * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
52
52
  * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
53
+ * [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
53
54
  * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
55
+ * [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
54
56
  * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
55
57
 
56
58
  ## License
data/Rakefile CHANGED
@@ -1,14 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'bundler'
3
5
  require 'bump/tasks'
4
- require "rake/testtask"
6
+ require 'rake/testtask'
5
7
  Bundler::GemHelper.install_tasks
6
8
 
7
9
  Bump.tag_by_default = true
8
10
 
9
11
  Rake::TestTask.new do |t|
10
- t.libs << "test"
11
- t.test_files = FileList['test/*_test.rb']
12
+ t.libs << 'test'
13
+ t.test_files = FileList['test/**/*_test.rb']
12
14
  t.warning = true
13
15
  t.verbose = true
14
16
  end
@@ -1,27 +1,28 @@
1
- # encoding: utf-8
2
- $LOAD_PATH.push File.expand_path("../lib", __FILE__)
3
- require "legitbot/version"
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.push File.expand_path('lib', __dir__)
4
+ require 'legitbot/version'
4
5
 
5
6
  Gem::Specification.new do |spec|
6
7
  spec.name = 'legitbot'
7
8
  spec.version = Legitbot::VERSION
8
9
  spec.license = 'Apache-2.0'
9
10
 
10
- spec.author = "Alexander Azarov"
11
- spec.email = "self@alaz.me"
12
- spec.homepage = "https://github.com/alaz/legitbot"
13
- spec.summary = %q{Validate requests from Web crawlers: impersonating or not?}
14
- spec.description = "A library to make sure a Web request has been "\
15
- "made by a real search engine, not a malicious agent"
11
+ spec.author = 'Alexander Azarov'
12
+ spec.email = 'self@alaz.me'
13
+ spec.homepage = 'https://github.com/alaz/legitbot'
14
+ spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
15
+ spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
16
16
 
17
- spec.required_ruby_version = '>= 2.3.0'
18
- spec.add_dependency "irrc", ">= 0.2.1"
19
- spec.add_dependency "augmented_interval_tree", ">= 0.1.1"
20
- spec.add_development_dependency "bump"
21
- spec.add_development_dependency "rake"
22
- spec.add_development_dependency "minitest"
17
+ spec.required_ruby_version = '>= 2.4.0'
18
+ spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
19
+ spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
20
+ spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
+ spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
+ spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
23
+ spec.add_development_dependency 'rubocop', '~> 0.74', '>= 0.74.0'
23
24
 
24
- spec.files = `git ls-files`.split($/)
25
- spec.rdoc_options = ["--charset=UTF-8"]
26
- spec.test_files = Dir.glob("test/**/*")
25
+ spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
+ spec.rdoc_options = ['--charset=UTF-8']
27
+ spec.test_files = Dir.glob('test/**/*')
27
28
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'legitbot/legitbot'
2
4
  require_relative 'legitbot/botmatch'
3
5
 
@@ -8,5 +10,7 @@ require_relative 'legitbot/bing'
8
10
  require_relative 'legitbot/duckduckgo'
9
11
  require_relative 'legitbot/facebook'
10
12
  require_relative 'legitbot/google'
13
+ require_relative 'legitbot/oracle'
11
14
  require_relative 'legitbot/pinterest'
15
+ require_relative 'legitbot/twitter'
12
16
  require_relative 'legitbot/yandex'
@@ -1,13 +1,18 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # https://ahrefs.com/robot
3
5
  class Ahrefs < BotMatch
4
- Ranges = %w(54.36.148.0/24 54.36.149.0/24 54.36.150.0/24 195.154.122.0/24 195.154.123.0/24 195.154.126.0/24 195.154.127.0/24)
5
-
6
- def valid?
7
- ip = IPAddr.new @ip
8
- Ranges.any? { |range| IPAddr.new(range).include? ip }
9
- end
6
+ ip_ranges %w[
7
+ 54.36.148.0/24
8
+ 54.36.149.0/24
9
+ 54.36.150.0/24
10
+ 195.154.122.0/24
11
+ 195.154.123.0/24
12
+ 195.154.126.0/24
13
+ 195.154.127.0/24
14
+ ]
10
15
  end
11
16
 
12
- rule Legitbot::Ahrefs, %w(AhrefsBot)
17
+ rule Legitbot::Ahrefs, %w[AhrefsBot]
13
18
  end
@@ -1,20 +1,20 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'ipaddr'
2
4
 
3
- module Legitbot
5
+ module Legitbot # :nodoc:
4
6
  # https://support.apple.com/en-us/HT204683
5
-
6
7
  class Apple < BotMatch
7
- Range = IPAddr.new('17.0.0.0/8')
8
-
9
- def valid?
10
- ip = IPAddr.new @ip
11
- Range.include? ip
12
- end
8
+ ip_ranges '17.0.0.0/8'
13
9
  end
14
10
 
15
- class Apple_as_Google < Apple
11
+ # https://support.apple.com/en-us/HT204683
12
+ # rubocop:disable Naming/ClassAndModuleCamelCase
13
+ class Apple_as_Google < BotMatch
14
+ ip_ranges '17.0.0.0/8'
16
15
  end
16
+ # rubocop:enable Naming/ClassAndModuleCamelCase
17
17
 
18
- rule Legitbot::Apple, %w(Applebot)
19
- rule Legitbot::Apple_as_Google, %w(Googlebot)
18
+ rule Legitbot::Apple, %w[Applebot]
19
+ rule Legitbot::Apple_as_Google, %w[Googlebot]
20
20
  end
@@ -1,12 +1,10 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # http://help.baidu.com/question?prod_en=master&class=498&id=1000973
3
5
  class Baidu < BotMatch
4
- ValidDomains = ["baidu.com.", "baidu.jp."]
5
-
6
- def valid?
7
- subdomain_of?(*Baidu::ValidDomains)
8
- end
6
+ domains 'baidu.com.', 'baidu.jp.', reverse: false
9
7
  end
10
8
 
11
- rule Legitbot::Baidu, %w(Baiduspider)
9
+ rule Legitbot::Baidu, %w[Baiduspider]
12
10
  end
@@ -1,12 +1,10 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/
3
5
  class Bing < BotMatch
4
- ValidDomains = ["search.msn.com."]
5
-
6
- def valid?
7
- subdomain_of?(*Bing::ValidDomains) && reverse_resolves?
8
- end
6
+ domains 'search.msn.com.'
9
7
  end
10
8
 
11
- rule Legitbot::Bing, %w(Bingbot bingbot)
9
+ rule Legitbot::Bing, %w[Bingbot bingbot]
12
10
  end
@@ -1,5 +1,8 @@
1
- require 'resolv'
2
- require 'ipaddr'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'config/resolver'
4
+ require_relative 'validators/domains'
5
+ require_relative 'validators/ip_ranges'
3
6
 
4
7
  module Legitbot
5
8
  ##
@@ -7,61 +10,31 @@ module Legitbot
7
10
  # +valid?+, +fake?+ and +detected_as+
8
11
  #
9
12
  class BotMatch
10
- def initialize(ip, resolver_config = nil)
11
- @dns = Resolv::DNS.new(resolver_config)
12
- @ip = ip
13
- end
14
-
15
- ##
16
- # Returns a Resolv::DNS::Name instance with
17
- # the reverse name
18
- def reverse_domain
19
- @reverse_domain ||= @dns.getname(@ip)
20
- rescue Resolv::ResolvError
21
- @reverse_domain ||= nil
22
- end
23
-
24
- ##
25
- # Returns a String with the reverse name
26
- def reverse_name
27
- reverse_domain&.to_s
28
- end
29
-
30
- ##
31
- # Returns a String with IP created from the reverse name
32
- def reversed_ip
33
- return nil if reverse_name.nil?
13
+ include Legitbot::Validators::IpRanges
14
+ include Legitbot::Validators::Domains
34
15
 
35
- @reverse_ip ||= @dns.getaddress(reverse_name)
36
- @reverse_ip.to_s
37
- end
38
-
39
- def reverse_resolves?
40
- @ip == reversed_ip
41
- end
42
-
43
- def subdomain_of?(*domains)
44
- return false if reverse_name.nil?
45
-
46
- domains.any? { |d|
47
- reverse_domain.subdomain_of? Resolv::DNS::Name.create(d)
48
- }
16
+ def initialize(ip)
17
+ @ip = ip
49
18
  end
50
19
 
51
20
  def detected_as
52
21
  self.class.name.split('::').last.downcase.to_sym
53
22
  end
54
23
 
24
+ def valid?
25
+ valid_ip? && valid_domain?
26
+ end
27
+
55
28
  def fake?
56
29
  !valid?
57
30
  end
58
31
 
59
- def self.valid?(ip, resolver_config = nil)
60
- self.new(ip, resolver_config).valid?
32
+ def self.valid?(ip)
33
+ new(ip).valid?
61
34
  end
62
35
 
63
- def self.fake?(ip, resolver_config = nil)
64
- self.new(ip, resolver_config).fake?
36
+ def self.fake?(ip)
37
+ new(ip).fake?
65
38
  end
66
39
  end
67
40
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'resolv'
4
+
5
+ module Legitbot
6
+ module Config
7
+ module Resolver # :nodoc:
8
+ def resolver_config(options = nil)
9
+ @resolver_config = options
10
+ end
11
+
12
+ def resolver
13
+ @resolver_config ||= Legitbot.resolver_config
14
+ @resolver ||= Resolv::DNS.new @resolver_config
15
+ end
16
+ end
17
+ end
18
+ end
@@ -1,12 +1,23 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # https://duckduckgo.com/duckduckbot
3
5
  class DuckDuckGo < BotMatch
4
- ValidIPs = %w(50.16.241.113 50.16.241.114 50.16.241.117 50.16.247.234 52.204.97.54 52.5.190.19 54.197.234.188 54.208.100.253 23.21.227.69)
5
-
6
- def valid?
7
- DuckDuckGo::ValidIPs.include? @ip
8
- end
6
+ ip_ranges %w[
7
+ 23.21.227.69
8
+ 40.88.21.235
9
+ 50.16.241.113
10
+ 50.16.241.114
11
+ 50.16.241.117
12
+ 50.16.247.234
13
+ 52.204.97.54
14
+ 52.5.190.19
15
+ 54.197.234.188
16
+ 54.208.100.253
17
+ 54.208.102.37
18
+ 107.21.1.8
19
+ ]
9
20
  end
10
21
 
11
- rule Legitbot::DuckDuckGo, %w(DuckDuckBot)
22
+ rule Legitbot::DuckDuckGo, %w[DuckDuckBot]
12
23
  end
@@ -1,48 +1,22 @@
1
- require 'ipaddr'
1
+ # frozen_string_literal: true
2
+
2
3
  require 'irrc'
3
- require 'interval_tree'
4
4
 
5
- module Legitbot
5
+ module Legitbot # :nodoc:
6
6
  # https://developers.facebook.com/docs/sharing/webmasters/crawler
7
-
8
7
  class Facebook < BotMatch
9
8
  AS = 'AS32934'
10
9
 
11
- def valid?
12
- ip = IPAddr.new(@ip)
13
- Facebook.valid_ips[ip.ipv4? ? :ipv4 : :ipv6].search(ip.to_i).size > 0
14
- end
15
-
16
- @mutex = Mutex.new
17
-
18
- def self.valid_ips
19
- @mutex.synchronize { @ips ||= load_ips }
20
- end
21
-
22
- def self.reload!
23
- @mutex.synchronize { @ips = load_ips }
24
- end
25
-
26
- def self.load_ips
27
- whois.map do |(family, records)|
28
- ranges = records.map do |cidr|
29
- range = IPAddr.new(cidr).to_range
30
- (range.begin.to_i..range.end.to_i)
31
- end
32
- [family, IntervalTree::Tree.new(ranges)]
33
- end.to_h
34
- end
35
-
36
- def self.whois
10
+ ip_ranges do
37
11
  client = Irrc::Client.new
38
12
  client.query :radb, AS
39
13
  results = client.perform
40
14
 
41
- %i(ipv4 ipv6).map do |family|
42
- [family, results[AS][family][AS]]
43
- end.to_h
15
+ %i[ipv4 ipv6].map do |family|
16
+ results[AS][family][AS]
17
+ end.flatten
44
18
  end
45
19
  end
46
20
 
47
- rule Legitbot::Facebook, %w(Facebot facebookexternalhit/1.1)
21
+ rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
48
22
  end