legitbot 0.4.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +60 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -7
- data/.ruby-version +1 -0
- data/README.md +8 -1
- data/legitbot.gemspec +19 -19
- data/lib/legitbot.rb +2 -0
- data/lib/legitbot/duckduckgo.rb +4 -1
- data/lib/legitbot/legitbot.rb +2 -0
- data/lib/legitbot/oracle.rb +10 -0
- data/lib/legitbot/twitter.rb +14 -0
- data/lib/legitbot/validators/ip_ranges.rb +11 -7
- data/lib/legitbot/version.rb +1 -1
- data/lib/legitbot/yandex.rb +27 -14
- data/test/apple_test.rb +2 -2
- data/test/facebook_test.rb +5 -5
- data/test/legitbot/validators/ip_ranges_test.rb +9 -0
- data/test/oracle_test.rb +36 -0
- data/test/twitter_test.rb +36 -0
- metadata +68 -26
- data/.travis.yml +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1596ed8c3809fc3c3068f14d8bbbcf84232286fd7157fe724ba9515d195259cd
|
4
|
+
data.tar.gz: f0078d0404d752550adeeaa9c64954cbcec57fef3d0b37c98bc50d765c29bc1f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5640ae1e351bbd697325cd0bed0bdac45c3726e7b31923b7b5c8f51859ed025d4eb368d937a2b3bf9ef6c150d83418f8e019304dec3bc1b010572a4e1598c661
|
7
|
+
data.tar.gz: bf326da52d3adf1b2cfa3693b51cc763daef54c9b175cfb91b437b213722a0f578565db553f1240b6d6ee47d159bb418fdcc1db1f95cb14696b2bfb52e50b75d
|
@@ -0,0 +1,60 @@
|
|
1
|
+
name: build
|
2
|
+
|
3
|
+
on: [push]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
|
9
|
+
strategy:
|
10
|
+
fail-fast: false
|
11
|
+
matrix:
|
12
|
+
ruby: [ jruby, 2.6 ]
|
13
|
+
|
14
|
+
steps:
|
15
|
+
- uses: actions/checkout@v2
|
16
|
+
- name: Set up Ruby
|
17
|
+
uses: ruby/setup-ruby@v1
|
18
|
+
with:
|
19
|
+
ruby-version: ${{ matrix.ruby }}
|
20
|
+
- name: Cache dependencies
|
21
|
+
uses: actions/cache@v1
|
22
|
+
with:
|
23
|
+
path: vendor/bundle
|
24
|
+
key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
|
25
|
+
restore-keys: |
|
26
|
+
${{ runner.os }}-${{ matrix.ruby }}-gems-
|
27
|
+
- name: Install dependencies
|
28
|
+
run: |
|
29
|
+
bundle config path vendor/bundle
|
30
|
+
bundle install --jobs 4 --retry 3
|
31
|
+
- name: Run tests
|
32
|
+
run: bundle exec rake test
|
33
|
+
|
34
|
+
lint:
|
35
|
+
needs: test
|
36
|
+
runs-on: ubuntu-latest
|
37
|
+
|
38
|
+
strategy:
|
39
|
+
matrix:
|
40
|
+
ruby: [ 2.6 ]
|
41
|
+
|
42
|
+
steps:
|
43
|
+
- uses: actions/checkout@v2
|
44
|
+
- name: Set up Ruby
|
45
|
+
uses: ruby/setup-ruby@v1
|
46
|
+
with:
|
47
|
+
ruby-version: ${{ matrix.ruby }}
|
48
|
+
- name: Cache dependencies
|
49
|
+
uses: actions/cache@v1
|
50
|
+
with:
|
51
|
+
path: vendor/bundle
|
52
|
+
key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
|
53
|
+
restore-keys: |
|
54
|
+
${{ runner.os }}-${{ matrix.ruby }}-gems-
|
55
|
+
- name: Install dependencies
|
56
|
+
run: |
|
57
|
+
bundle config path vendor/bundle
|
58
|
+
bundle install --jobs 4 --retry 3
|
59
|
+
- name: Run linter
|
60
|
+
run: bundle exec rubocop
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.4
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Legitbot
|
1
|
+
# Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) [![Gem Version](https://badge.fury.io/rb/legitbot.svg)](https://badge.fury.io/rb/legitbot)
|
2
2
|
|
3
3
|
Ruby gem to check that an IP belongs to a bot, typically a search
|
4
4
|
engine. This can be of help in protecting a web site from fake search
|
@@ -50,7 +50,9 @@ end
|
|
50
50
|
* [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
|
51
51
|
* [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
|
52
52
|
* [Google crawlers](https://support.google.com/webmasters/answer/1061943)
|
53
|
+
* [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
|
53
54
|
* [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
|
55
|
+
* [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
|
54
56
|
* [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
|
55
57
|
|
56
58
|
## License
|
@@ -65,3 +67,8 @@ Apache 2.0
|
|
65
67
|
detects bots by `User-Agent`
|
66
68
|
* [crawler_detect](https://github.com/loadkpi/crawler_detect) is a Ruby gem and Rack
|
67
69
|
middleware to detect crawlers by few different request headers, including `User-Agent`
|
70
|
+
* Project Honeypot's
|
71
|
+
[http:BL](https://www.projecthoneypot.org/httpbl_api.php) can not only
|
72
|
+
classify IP as a search engine, but also label them as suspicious and
|
73
|
+
reports the number of days since the last activity. My implementation of
|
74
|
+
the protocol in Scala is [here](https://github.com/osinka/httpbl).
|
data/legitbot.gemspec
CHANGED
@@ -1,28 +1,28 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
|
+
require 'legitbot/version'
|
4
5
|
|
5
6
|
Gem::Specification.new do |spec|
|
6
7
|
spec.name = 'legitbot'
|
7
8
|
spec.version = Legitbot::VERSION
|
8
9
|
spec.license = 'Apache-2.0'
|
9
10
|
|
10
|
-
spec.author =
|
11
|
-
spec.email =
|
12
|
-
spec.homepage =
|
13
|
-
spec.summary =
|
14
|
-
spec.description =
|
15
|
-
"made by a real search engine, not a malicious agent"
|
11
|
+
spec.author = 'Alexander Azarov'
|
12
|
+
spec.email = 'self@alaz.me'
|
13
|
+
spec.homepage = 'https://github.com/alaz/legitbot'
|
14
|
+
spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
|
15
|
+
spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
|
16
16
|
|
17
|
-
spec.required_ruby_version = '>= 2.
|
18
|
-
spec.add_dependency
|
19
|
-
spec.add_dependency
|
20
|
-
spec.add_development_dependency
|
21
|
-
spec.add_development_dependency
|
22
|
-
spec.add_development_dependency
|
23
|
-
spec.add_development_dependency
|
17
|
+
spec.required_ruby_version = '>= 2.4.0'
|
18
|
+
spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
|
19
|
+
spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
|
20
|
+
spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
|
21
|
+
spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
|
22
|
+
spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
|
23
|
+
spec.add_development_dependency 'rubocop', '~> 0.74', '>= 0.74.0'
|
24
24
|
|
25
|
-
spec.files = `git ls-files`.split(
|
26
|
-
spec.rdoc_options = [
|
27
|
-
spec.test_files = Dir.glob(
|
25
|
+
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
26
|
+
spec.rdoc_options = ['--charset=UTF-8']
|
27
|
+
spec.test_files = Dir.glob('test/**/*')
|
28
28
|
end
|
data/lib/legitbot.rb
CHANGED
@@ -10,5 +10,7 @@ require_relative 'legitbot/bing'
|
|
10
10
|
require_relative 'legitbot/duckduckgo'
|
11
11
|
require_relative 'legitbot/facebook'
|
12
12
|
require_relative 'legitbot/google'
|
13
|
+
require_relative 'legitbot/oracle'
|
13
14
|
require_relative 'legitbot/pinterest'
|
15
|
+
require_relative 'legitbot/twitter'
|
14
16
|
require_relative 'legitbot/yandex'
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -4,6 +4,8 @@ module Legitbot # :nodoc:
|
|
4
4
|
# https://duckduckgo.com/duckduckbot
|
5
5
|
class DuckDuckGo < BotMatch
|
6
6
|
ip_ranges %w[
|
7
|
+
23.21.227.69
|
8
|
+
40.88.21.235
|
7
9
|
50.16.241.113
|
8
10
|
50.16.241.114
|
9
11
|
50.16.241.117
|
@@ -12,7 +14,8 @@ module Legitbot # :nodoc:
|
|
12
14
|
52.5.190.19
|
13
15
|
54.197.234.188
|
14
16
|
54.208.100.253
|
15
|
-
|
17
|
+
54.208.102.37
|
18
|
+
107.21.1.8
|
16
19
|
]
|
17
20
|
end
|
18
21
|
|
data/lib/legitbot/legitbot.rb
CHANGED
@@ -18,6 +18,7 @@ module Legitbot
|
|
18
18
|
# otherwise.
|
19
19
|
# :yields: a found bot
|
20
20
|
#
|
21
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
21
22
|
def self.bot(user_agent, ip)
|
22
23
|
bots = @rules
|
23
24
|
.select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
|
@@ -32,6 +33,7 @@ module Legitbot
|
|
32
33
|
selected
|
33
34
|
end
|
34
35
|
end
|
36
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
35
37
|
|
36
38
|
def self.rule(clazz, fragments)
|
37
39
|
@rules << { class: clazz, fragments: fragments }
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
4
|
+
# https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started
|
5
|
+
# https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards
|
6
|
+
class Twitter < BotMatch
|
7
|
+
ip_ranges %w[
|
8
|
+
199.16.156.0/22
|
9
|
+
199.59.148.0/22
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
rule Legitbot::Twitter, %w[Twitterbot]
|
14
|
+
end
|
@@ -59,22 +59,26 @@ module Legitbot
|
|
59
59
|
partition_ips(@ip_ranges_loader.call)
|
60
60
|
end
|
61
61
|
|
62
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
63
62
|
def partition_ips(ips)
|
64
|
-
return []
|
63
|
+
return [] unless ips&.any?
|
65
64
|
|
66
65
|
ips
|
67
66
|
.map { |cidr| IPAddr.new(cidr) }
|
68
67
|
.partition(&:ipv4?)
|
69
68
|
.each_with_index
|
70
69
|
.map do |list, index|
|
71
|
-
|
72
|
-
(r.begin.to_i..r.end.to_i)
|
73
|
-
end
|
74
|
-
[FAMILIES[index], IntervalTree::Tree.new(ranges)]
|
70
|
+
[FAMILIES[index], build_interval_tree(list)]
|
75
71
|
end.to_h
|
76
72
|
end
|
77
|
-
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def build_interval_tree(list)
|
77
|
+
ranges = list.map(&:to_range).map do |r|
|
78
|
+
(r.begin.to_i..r.end.to_i)
|
79
|
+
end
|
80
|
+
IntervalTree::Tree.new(ranges)
|
81
|
+
end
|
78
82
|
end
|
79
83
|
end
|
80
84
|
end
|
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot/yandex.rb
CHANGED
@@ -7,27 +7,40 @@ module Legitbot # :nodoc:
|
|
7
7
|
end
|
8
8
|
|
9
9
|
rule Legitbot::Yandex, %w[
|
10
|
-
YandexBot
|
11
10
|
YandexAccessibilityBot
|
12
|
-
|
13
|
-
YandexDirectDyn
|
14
|
-
YandexScreenshotBot
|
15
|
-
YandexImages
|
16
|
-
YandexVideo
|
17
|
-
YandexVideoParser
|
18
|
-
YandexMedia
|
11
|
+
YandexAdNet
|
19
12
|
YandexBlogs
|
13
|
+
YandexBot/
|
14
|
+
YandexCalendar
|
15
|
+
YandexDirect/
|
16
|
+
YandexDirectDyn
|
20
17
|
YandexFavicons
|
21
|
-
YandexWebmaster
|
22
|
-
YandexPagechecker
|
23
|
-
YandexImageResizer
|
24
18
|
YaDirectFetcher
|
25
|
-
|
26
|
-
|
19
|
+
YandexForDomain
|
20
|
+
YandexImages
|
21
|
+
YandexImageResizer
|
22
|
+
YandexMobileBot
|
23
|
+
YandexMarket
|
24
|
+
YandexMedia
|
27
25
|
YandexMetrika
|
26
|
+
YandexMobileScreenShotBot
|
28
27
|
YandexNews
|
29
|
-
|
28
|
+
YandexOntoDB
|
29
|
+
YandexOntoDBAPI
|
30
|
+
YandexPagechecker
|
31
|
+
YandexPartner
|
32
|
+
YandexRCA
|
30
33
|
YandexSearchShop
|
34
|
+
YandexSitelinks
|
35
|
+
YandexSpravBot
|
36
|
+
YandexTracker
|
37
|
+
YandexTurbo
|
38
|
+
YandexVertis
|
31
39
|
YandexVerticals
|
40
|
+
YandexVideo
|
41
|
+
YandexVideoParser
|
42
|
+
YandexWebmaster
|
43
|
+
YandexScreenshotBot
|
44
|
+
YandexMedianaBot
|
32
45
|
]
|
33
46
|
end
|
data/test/apple_test.rb
CHANGED
@@ -16,7 +16,7 @@ class AppleTest < Minitest::Test
|
|
16
16
|
assert match.fake?, msg: "#{ip} is a fake Applebot IP"
|
17
17
|
end
|
18
18
|
|
19
|
-
# rubocop:disable
|
19
|
+
# rubocop:disable Layout/LineLength
|
20
20
|
def test_user_agent
|
21
21
|
bot = Legitbot.bot(
|
22
22
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)',
|
@@ -25,5 +25,5 @@ class AppleTest < Minitest::Test
|
|
25
25
|
assert_equal :apple, bot.detected_as
|
26
26
|
assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
|
27
27
|
end
|
28
|
-
# rubocop:enable
|
28
|
+
# rubocop:enable Layout/LineLength
|
29
29
|
end
|
data/test/facebook_test.rb
CHANGED
@@ -5,14 +5,14 @@ require 'legitbot'
|
|
5
5
|
|
6
6
|
module Legitbot
|
7
7
|
class Facebook
|
8
|
-
# rubocop:disable
|
8
|
+
# rubocop:disable Layout/LineLength
|
9
9
|
def self.whois
|
10
10
|
{
|
11
11
|
ipv4: ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24'],
|
12
12
|
ipv6: []
|
13
13
|
}
|
14
14
|
end
|
15
|
-
# rubocop:enable
|
15
|
+
# rubocop:enable Layout/LineLength
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
@@ -33,7 +33,7 @@ class FacebookTest < Minitest::Test
|
|
33
33
|
assert match.fake?, msg: "#{ip} is a fake Facebook IP"
|
34
34
|
end
|
35
35
|
|
36
|
-
# rubocop:disable Metrics/LineLength, Metrics/MethodLength
|
36
|
+
# rubocop:disable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
|
37
37
|
def test_user_agent
|
38
38
|
Legitbot.bot(
|
39
39
|
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
|
@@ -55,9 +55,9 @@ class FacebookTest < Minitest::Test
|
|
55
55
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0',
|
56
56
|
'92.243.181.7'
|
57
57
|
) do |bot|
|
58
|
-
|
58
|
+
assert %i[facebook twitter].include?(bot.detected_as)
|
59
59
|
assert bot.fake?, msg: 'fake Facebook'
|
60
60
|
end
|
61
61
|
end
|
62
|
-
# rubocop:enable Metrics/LineLength, Metrics/MethodLength
|
62
|
+
# rubocop:enable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
|
63
63
|
end
|
@@ -46,6 +46,11 @@ module Legitbot
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
+
class NilRanges
|
50
|
+
include IpRanges
|
51
|
+
ip_ranges { nil }
|
52
|
+
end
|
53
|
+
|
49
54
|
class IpRangesTest < Minitest::Test
|
50
55
|
def test_partition_method
|
51
56
|
empty = NoRanges.partition_ips([])
|
@@ -108,6 +113,10 @@ module Legitbot
|
|
108
113
|
assert_equal 2, LoadRanges.counter
|
109
114
|
end
|
110
115
|
# rubocop:enable Metrics/AbcSize
|
116
|
+
|
117
|
+
def test_nil_ranges
|
118
|
+
assert NilRanges.valid_ip?('127.0.0.1')
|
119
|
+
end
|
111
120
|
end
|
112
121
|
end
|
113
122
|
end
|
data/test/oracle_test.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'legitbot'
|
5
|
+
|
6
|
+
class OracleTest < Minitest::Test
|
7
|
+
def test_malicious_ip
|
8
|
+
ip = '149.210.164.47'
|
9
|
+
match = Legitbot::Oracle.new ip
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real Oracle IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = '148.64.56.64'
|
15
|
+
match = Legitbot::Oracle.new ip
|
16
|
+
assert match.valid?, msg: "#{ip} is a valid Oracle IP"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_malicious_ua
|
20
|
+
bot = Legitbot.bot(
|
21
|
+
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
22
|
+
'149.210.164.47'
|
23
|
+
)
|
24
|
+
assert bot, msg: 'Oracle detected from User-Agent'
|
25
|
+
assert !bot.valid?, msg: 'Not a valid Oracle'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_valid_ua
|
29
|
+
bot = Legitbot.bot(
|
30
|
+
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
31
|
+
'148.64.56.64'
|
32
|
+
)
|
33
|
+
assert bot, msg: 'Oracle detected from User-Agent'
|
34
|
+
assert bot.valid?, msg: 'Valid Oracle'
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'legitbot'
|
5
|
+
|
6
|
+
class TwitterTest < Minitest::Test
|
7
|
+
def test_malicious_ip
|
8
|
+
ip = '149.210.164.47'
|
9
|
+
match = Legitbot::Twitter.new ip
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real Twitter IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = '199.16.156.125'
|
15
|
+
match = Legitbot::Twitter.new ip
|
16
|
+
assert match.valid?, msg: "#{ip} is a valid Twitter IP"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_malicious_ua
|
20
|
+
bot = Legitbot.bot(
|
21
|
+
'Twitterbot/1.0',
|
22
|
+
'149.210.164.47'
|
23
|
+
)
|
24
|
+
assert bot, msg: 'Twitter detected from User-Agent'
|
25
|
+
assert !bot.valid?, msg: 'Not a valid Twitter'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_valid_ua
|
29
|
+
bot = Legitbot.bot(
|
30
|
+
'Twitterbot/1.0',
|
31
|
+
'199.16.156.125'
|
32
|
+
)
|
33
|
+
assert bot, msg: 'Twitter detected from User-Agent'
|
34
|
+
assert bot.valid?, msg: 'Valid Twitter'
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,47 +1,62 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-07-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: augmented_interval_tree
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.1'
|
17
20
|
- - ">="
|
18
21
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
22
|
+
version: 0.1.1
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.1'
|
24
30
|
- - ">="
|
25
31
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
32
|
+
version: 0.1.1
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
34
|
+
name: irrc
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0.2'
|
31
40
|
- - ">="
|
32
41
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
42
|
+
version: 0.2.1
|
34
43
|
type: :runtime
|
35
44
|
prerelease: false
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
37
46
|
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0.2'
|
38
50
|
- - ">="
|
39
51
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
52
|
+
version: 0.2.1
|
41
53
|
- !ruby/object:Gem::Dependency
|
42
54
|
name: bump
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
44
56
|
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0.8'
|
45
60
|
- - ">="
|
46
61
|
- !ruby/object:Gem::Version
|
47
62
|
version: 0.8.0
|
@@ -49,61 +64,83 @@ dependencies:
|
|
49
64
|
prerelease: false
|
50
65
|
version_requirements: !ruby/object:Gem::Requirement
|
51
66
|
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0.8'
|
52
70
|
- - ">="
|
53
71
|
- !ruby/object:Gem::Version
|
54
72
|
version: 0.8.0
|
55
73
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
74
|
+
name: minitest
|
57
75
|
requirement: !ruby/object:Gem::Requirement
|
58
76
|
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '5.1'
|
59
80
|
- - ">="
|
60
81
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
82
|
+
version: 5.1.0
|
62
83
|
type: :development
|
63
84
|
prerelease: false
|
64
85
|
version_requirements: !ruby/object:Gem::Requirement
|
65
86
|
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '5.1'
|
66
90
|
- - ">="
|
67
91
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
92
|
+
version: 5.1.0
|
69
93
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
94
|
+
name: rake
|
71
95
|
requirement: !ruby/object:Gem::Requirement
|
72
96
|
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '12.3'
|
73
100
|
- - ">="
|
74
101
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
102
|
+
version: 12.3.0
|
76
103
|
type: :development
|
77
104
|
prerelease: false
|
78
105
|
version_requirements: !ruby/object:Gem::Requirement
|
79
106
|
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '12.3'
|
80
110
|
- - ">="
|
81
111
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
112
|
+
version: 12.3.0
|
83
113
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
114
|
+
name: rubocop
|
85
115
|
requirement: !ruby/object:Gem::Requirement
|
86
116
|
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0.74'
|
87
120
|
- - ">="
|
88
121
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
122
|
+
version: 0.74.0
|
90
123
|
type: :development
|
91
124
|
prerelease: false
|
92
125
|
version_requirements: !ruby/object:Gem::Requirement
|
93
126
|
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0.74'
|
94
130
|
- - ">="
|
95
131
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
97
|
-
description:
|
98
|
-
|
132
|
+
version: 0.74.0
|
133
|
+
description: Does Web request come from a real search engine or from an impersonating
|
134
|
+
agent?
|
99
135
|
email: self@alaz.me
|
100
136
|
executables: []
|
101
137
|
extensions: []
|
102
138
|
extra_rdoc_files: []
|
103
139
|
files:
|
140
|
+
- ".github/workflows/build.yml"
|
104
141
|
- ".gitignore"
|
105
142
|
- ".rubocop.yml"
|
106
|
-
- ".
|
143
|
+
- ".ruby-version"
|
107
144
|
- Gemfile
|
108
145
|
- LICENSE.txt
|
109
146
|
- README.md
|
@@ -120,7 +157,9 @@ files:
|
|
120
157
|
- lib/legitbot/facebook.rb
|
121
158
|
- lib/legitbot/google.rb
|
122
159
|
- lib/legitbot/legitbot.rb
|
160
|
+
- lib/legitbot/oracle.rb
|
123
161
|
- lib/legitbot/pinterest.rb
|
162
|
+
- lib/legitbot/twitter.rb
|
124
163
|
- lib/legitbot/validators/domains.rb
|
125
164
|
- lib/legitbot/validators/ip_ranges.rb
|
126
165
|
- lib/legitbot/version.rb
|
@@ -134,12 +173,14 @@ files:
|
|
134
173
|
- test/legitbot/validators/domains_test.rb
|
135
174
|
- test/legitbot/validators/ip_ranges_test.rb
|
136
175
|
- test/legitbot_test.rb
|
176
|
+
- test/oracle_test.rb
|
137
177
|
- test/pinterest_test.rb
|
178
|
+
- test/twitter_test.rb
|
138
179
|
homepage: https://github.com/alaz/legitbot
|
139
180
|
licenses:
|
140
181
|
- Apache-2.0
|
141
182
|
metadata: {}
|
142
|
-
post_install_message:
|
183
|
+
post_install_message:
|
143
184
|
rdoc_options:
|
144
185
|
- "--charset=UTF-8"
|
145
186
|
require_paths:
|
@@ -148,16 +189,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
148
189
|
requirements:
|
149
190
|
- - ">="
|
150
191
|
- !ruby/object:Gem::Version
|
151
|
-
version: 2.
|
192
|
+
version: 2.4.0
|
152
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
153
194
|
requirements:
|
154
195
|
- - ">="
|
155
196
|
- !ruby/object:Gem::Version
|
156
197
|
version: '0'
|
157
198
|
requirements: []
|
158
|
-
|
159
|
-
|
160
|
-
signing_key:
|
199
|
+
rubygems_version: 3.1.2
|
200
|
+
signing_key:
|
161
201
|
specification_version: 4
|
162
202
|
summary: 'Validate requests from Web crawlers: impersonating or not?'
|
163
203
|
test_files:
|
@@ -168,6 +208,8 @@ test_files:
|
|
168
208
|
- test/ahrefs_test.rb
|
169
209
|
- test/apple_test.rb
|
170
210
|
- test/apple_as_google_test.rb
|
211
|
+
- test/oracle_test.rb
|
171
212
|
- test/google_test.rb
|
172
213
|
- test/botmatch_test.rb
|
173
214
|
- test/facebook_test.rb
|
215
|
+
- test/twitter_test.rb
|