legitbot 0.3.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,11 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # https://support.google.com/webmasters/answer/1061943
3
5
  # https://support.google.com/webmasters/answer/80553
4
-
5
6
  class Google < BotMatch
6
- ValidDomains = ["google.com.", "googlebot.com."]
7
-
8
- def valid?
9
- subdomain_of?(*Google::ValidDomains) && reverse_resolves?
10
- end
7
+ domains 'google.com.', 'googlebot.com.'
11
8
  end
12
9
 
13
- rule Legitbot::Google, %w(Googlebot Mediapartners-Google AdsBot-Google)
10
+ rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
14
11
  end
@@ -1,6 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Bot lookup based on user agent
1
5
  module Legitbot
2
6
  @rules = []
3
7
 
8
+ class << self
9
+ attr_accessor :resolver_config
10
+ end
11
+
4
12
  ##
5
13
  # Lookup a bot based on its signature from +User-Agent+ header.
6
14
  #
@@ -10,15 +18,12 @@ module Legitbot
10
18
  # otherwise.
11
19
  # :yields: a found bot
12
20
  #
13
- def self.bot(userAgent, ip, resolver_config = nil)
14
- bots =
15
- @rules.select { |rule|
16
- rule[:fragments].any? {|f| userAgent.index f}
17
- }.map { |rule|
18
- rule[:class].new(ip, resolver_config)
19
- }
21
+ def self.bot(user_agent, ip)
22
+ bots = @rules
23
+ .select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
24
+ .map { |rule| rule[:class].new(ip) }
20
25
 
21
- selected = bots.select { |b| b.valid? }.first if bots.size > 1
26
+ selected = bots.select(&:valid?).first if bots.size > 1
22
27
  selected = bots.last if selected.nil?
23
28
 
24
29
  if selected && block_given?
@@ -29,6 +34,6 @@ module Legitbot
29
34
  end
30
35
 
31
36
  def self.rule(clazz, fragments)
32
- @rules << {:class => clazz, :fragments => fragments}
37
+ @rules << { class: clazz, fragments: fragments }
33
38
  end
34
39
  end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html
5
+ class Oracle < BotMatch
6
+ ip_ranges '148.64.56.0/24'
7
+ end
8
+
9
+ rule Legitbot::Oracle, %w[GrapeshotCrawler]
10
+ end
@@ -1,13 +1,10 @@
1
- module Legitbot
2
- # https://help.pinterest.com/en/articles/about-pinterest-crawler-0
1
+ # frozen_string_literal: true
3
2
 
3
+ module Legitbot # :nodoc:
4
+ # https://help.pinterest.com/en/articles/about-pinterest-crawler-0
4
5
  class Pinterest < BotMatch
5
- ValidDomains = ["pinterest.com."]
6
-
7
- def valid?
8
- subdomain_of?(*Pinterest::ValidDomains) && reverse_resolves?
9
- end
6
+ domains 'pinterest.com.'
10
7
  end
11
8
 
12
- rule Legitbot::Pinterest, %w(Pinterestbot Pinterest/0.2)
9
+ rule Legitbot::Pinterest, %w[Pinterestbot Pinterest/0.2]
13
10
  end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started
5
+ # https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards
6
+ class Twitter < BotMatch
7
+ ip_ranges %w[
8
+ 199.16.156.0/22
9
+ 199.59.148.0/22
10
+ ]
11
+ end
12
+
13
+ rule Legitbot::Twitter, %w[Twitterbot]
14
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'resolv'
4
+ require 'ipaddr'
5
+
6
+ module Legitbot
7
+ module Validators
8
+ #
9
+ # In a bot matcher:
10
+ # `domains 'search.msn.com', ...`
11
+ # `domains 'googlebot.com', reverse: false`
12
+ #
13
+ # `reverse` is true by default.
14
+ module Domains
15
+ class << self
16
+ def included(base)
17
+ base.extend ClassMethods
18
+ end
19
+ end
20
+
21
+ def valid_domain?
22
+ self.class.valid_domain?(@ip)
23
+ end
24
+
25
+ module ClassMethods # :nodoc:
26
+ include Legitbot::Config::Resolver
27
+
28
+ def domains(*list, reverse: true)
29
+ @valid_domains = list.flatten.map { |d| Resolv::DNS::Name.create(d) }
30
+ @validate_reverse_record = reverse
31
+ end
32
+
33
+ def check_domains?
34
+ instance_variable_defined?(:@valid_domains)
35
+ end
36
+
37
+ def valid_domain?(ip)
38
+ return true unless check_domains?
39
+ return true if @valid_domains.empty?
40
+
41
+ domains = reverse_domains(ip)
42
+ return false if domains.empty?
43
+
44
+ record = find_subdomain_record(domains)
45
+ return false unless record
46
+ return true unless @validate_reverse_record
47
+
48
+ ip == reverse_ip(record)
49
+ end
50
+
51
+ def reverse_domains(ip)
52
+ resolver.getnames(ip)
53
+ rescue Resolv::ResolvError
54
+ nil
55
+ end
56
+
57
+ def find_subdomain_record(domains)
58
+ domains.find do |d|
59
+ @valid_domains.any? { |vd| d.subdomain_of?(vd) }
60
+ end
61
+ end
62
+
63
+ def reverse_ip(record)
64
+ return nil if record.nil?
65
+
66
+ resolver.getaddress(record.to_s).to_s
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ipaddr'
4
+ require 'interval_tree'
5
+
6
+ module Legitbot
7
+ module Validators
8
+ #
9
+ # In a bot matcher:
10
+ # `ip_ranges ip, range, ip, ...`
11
+ # `ip_ranges do [ip, range, ...]; end`
12
+ module IpRanges
13
+ class << self
14
+ def included(base)
15
+ base.extend ClassMethods
16
+ end
17
+ end
18
+
19
+ def valid_ip?
20
+ self.class.valid_ip?(@ip)
21
+ end
22
+
23
+ module ClassMethods # :nodoc:
24
+ FAMILIES = %i[ipv4 ipv6].freeze
25
+ EMPTY_GENERATOR = proc { [] }
26
+
27
+ def ip_ranges(*ips, &block)
28
+ @ip_ranges = partition_ips(ips.flatten) unless ips.empty?
29
+ @ip_ranges_loader = block_given? ? block : EMPTY_GENERATOR
30
+ @ip_loader_mutex = Mutex.new
31
+ end
32
+
33
+ def check_ranges?
34
+ instance_variable_defined?(:@ip_ranges_loader)
35
+ end
36
+
37
+ def valid_ip?(ip)
38
+ return true unless check_ranges?
39
+ return true if valid_ips.empty?
40
+
41
+ obj = IPAddr.new(ip)
42
+ ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i)
43
+ !ranges.empty?
44
+ end
45
+
46
+ def valid_ips
47
+ @ip_loader_mutex.synchronize do
48
+ @ip_ranges ||= load_ips
49
+ end
50
+ end
51
+
52
+ def reload_ips
53
+ @ip_loader_mutex.synchronize do
54
+ @ip_ranges = load_ips
55
+ end
56
+ end
57
+
58
+ def load_ips
59
+ partition_ips(@ip_ranges_loader.call)
60
+ end
61
+
62
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
63
+ def partition_ips(ips)
64
+ return [] if ips.empty?
65
+
66
+ ips
67
+ .map { |cidr| IPAddr.new(cidr) }
68
+ .partition(&:ipv4?)
69
+ .each_with_index
70
+ .map do |list, index|
71
+ ranges = list.map(&:to_range).map do |r|
72
+ (r.begin.to_i..r.end.to_i)
73
+ end
74
+ [FAMILIES[index], IntervalTree::Tree.new(ranges)]
75
+ end.to_h
76
+ end
77
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
78
+ end
79
+ end
80
+ end
81
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Legitbot
2
- VERSION = '0.3.2'
4
+ VERSION = '1.0.0'
3
5
  end
@@ -1,17 +1,46 @@
1
- module Legitbot
2
- # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
1
+ # frozen_string_literal: true
3
2
 
3
+ module Legitbot # :nodoc:
4
+ # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
4
5
  class Yandex < BotMatch
5
- ValidDomains = ["yandex.ru.", "yandex.net.", "yandex.com."]
6
-
7
- def valid?
8
- subdomain_of?(*Yandex::ValidDomains) && reverse_resolves?
9
- end
6
+ domains 'yandex.ru.', 'yandex.net.', 'yandex.com.'
10
7
  end
11
8
 
12
- rule Legitbot::Yandex, %w(YandexBot YandexAccessibilityBot YandexMobileBot
13
- YandexDirectDyn YandexScreenshotBot YandexImages YandexVideo YandexVideoParser
14
- YandexMedia YandexBlogs YandexFavicons YandexWebmaster YandexPagechecker
15
- YandexImageResizer YaDirectFetcher YandexCalendar YandexSitelinks YandexMetrika
16
- YandexNews YandexVertis YandexSearchShop YandexVerticals)
9
+ rule Legitbot::Yandex, %w[
10
+ YandexAccessibilityBot
11
+ YandexAdNet
12
+ YandexBlogs
13
+ YandexBot/
14
+ YandexCalendar
15
+ YandexDirect/
16
+ YandexDirectDyn
17
+ YandexFavicons
18
+ YaDirectFetcher
19
+ YandexForDomain
20
+ YandexImages
21
+ YandexImageResizer
22
+ YandexMobileBot
23
+ YandexMarket
24
+ YandexMedia
25
+ YandexMetrika
26
+ YandexMobileScreenShotBot
27
+ YandexNews
28
+ YandexOntoDB
29
+ YandexOntoDBAPI
30
+ YandexPagechecker
31
+ YandexPartner
32
+ YandexRCA
33
+ YandexSearchShop
34
+ YandexSitelinks
35
+ YandexSpravBot
36
+ YandexTracker
37
+ YandexTurbo
38
+ YandexVertis
39
+ YandexVerticals
40
+ YandexVideo
41
+ YandexVideoParser
42
+ YandexWebmaster
43
+ YandexScreenshotBot
44
+ YandexMedianaBot
45
+ ]
17
46
  end
@@ -1,28 +1,36 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'minitest/autorun'
2
4
  require 'legitbot'
3
5
 
4
6
  class AhrefsTest < Minitest::Test
5
7
  def test_malicious_ip
6
- ip = "149.210.164.47"
8
+ ip = '149.210.164.47'
7
9
  match = Legitbot::Ahrefs.new ip
8
10
  assert !match.valid?, msg: "#{ip} is not a real Ahrefs IP"
9
11
  end
10
12
 
11
13
  def test_valid_ip
12
- ip = "54.36.148.0"
14
+ ip = '54.36.148.0'
13
15
  match = Legitbot::Ahrefs.new ip
14
16
  assert match.valid?, msg: "#{ip} is a valid Ahrefs IP"
15
17
  end
16
18
 
17
19
  def test_malicious_ua
18
- bot = Legitbot.bot("Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)", "149.210.164.47")
19
- assert bot, msg: "Ahrefs detected from User-Agent"
20
- assert !bot.valid?, msg: "Not a valid Ahrefs"
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'Ahrefs detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid Ahrefs'
21
26
  end
22
27
 
23
28
  def test_valid_ua
24
- bot = Legitbot.bot("Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)", "54.36.148.0")
25
- assert bot, msg: "Ahrefs detected from User-Agent"
26
- assert bot.valid?, msg: "Valid Ahrefs"
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
31
+ '54.36.148.0'
32
+ )
33
+ assert bot, msg: 'Ahrefs detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid Ahrefs'
27
35
  end
28
36
  end
@@ -1,22 +1,27 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'minitest/autorun'
2
4
  require 'legitbot'
3
5
 
4
6
  class AppleAsGoogleTest < Minitest::Test
5
7
  def test_valid_ip
6
- ip = "17.58.98.60"
8
+ ip = '17.58.98.60'
7
9
  match = Legitbot::Apple_as_Google.new(ip)
8
10
  assert match.valid?, msg: "#{ip} is a valid Applebot IP"
9
11
  end
10
12
 
11
13
  def test_invalid_ip
12
- ip = "127.0.0.1"
14
+ ip = '127.0.0.1'
13
15
  match = Legitbot::Apple_as_Google.new(ip)
14
16
  assert match.fake?, msg: "#{ip} is a fake Applebot IP"
15
17
  end
16
18
 
17
19
  def test_user_agent
18
- bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "17.58.98.60")
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
22
+ '17.58.98.60'
23
+ )
19
24
  assert_equal :apple_as_google, bot.detected_as
20
- assert bot.valid?, msg: "A valid Applebot User-agent and IP"
25
+ assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
21
26
  end
22
27
  end
@@ -1,22 +1,29 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'minitest/autorun'
2
4
  require 'legitbot'
3
5
 
4
6
  class AppleTest < Minitest::Test
5
7
  def test_valid_ip
6
- ip = "17.58.98.60"
8
+ ip = '17.58.98.60'
7
9
  match = Legitbot::Apple.new(ip)
8
10
  assert match.valid?, msg: "#{ip} is a valid Applebot IP"
9
11
  end
10
12
 
11
13
  def test_invalid_ip
12
- ip = "127.0.0.1"
14
+ ip = '127.0.0.1'
13
15
  match = Legitbot::Apple.new(ip)
14
16
  assert match.fake?, msg: "#{ip} is a fake Applebot IP"
15
17
  end
16
18
 
19
+ # rubocop:disable Layout/LineLength
17
20
  def test_user_agent
18
- bot = Legitbot.bot("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)", "17.58.98.60")
21
+ bot = Legitbot.bot(
22
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)',
23
+ '17.58.98.60'
24
+ )
19
25
  assert_equal :apple, bot.detected_as
20
- assert bot.valid?, msg: "A valid Applebot User-agent and IP"
26
+ assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
21
27
  end
28
+ # rubocop:enable Layout/LineLength
22
29
  end
@@ -1,29 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'minitest/autorun'
2
4
  require 'legitbot'
3
5
 
4
6
  class BotMatchTest < Minitest::Test
5
- def test_reverse_name
6
- match = Legitbot::BotMatch.new "66.249.64.141"
7
- assert_equal "crawl-66-249-64-141.googlebot.com", match.reverse_name
8
- end
9
-
10
- def test_reverse_ip
11
- match = Legitbot::BotMatch.new "66.249.64.141"
12
- assert_equal "66.249.64.141", match.reversed_ip
13
- end
14
-
15
- def test_reverse_resolves
16
- match = Legitbot::BotMatch.new "66.249.64.141"
17
- assert_equal true, match.reverse_resolves?
18
- end
19
-
20
- def test_reverse_doesnt_resolve
21
- match = Legitbot::BotMatch.new "5.140.70.64"
22
- assert !match.reverse_resolves?
23
- end
24
-
25
7
  def test_valid_class_syntax
26
- assert Legitbot::Google.valid?("66.249.64.141"), msg: "Valid Googlebot"
27
- assert Legitbot::Google.fake?("149.210.164.47"), msg: "Fake Googlebot"
8
+ assert Legitbot::Google.valid?('66.249.64.141'), msg: 'Valid Googlebot'
9
+ assert Legitbot::Google.fake?('149.210.164.47'), msg: 'Fake Googlebot'
28
10
  end
29
11
  end