legitbot 0.4.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +60 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -7
- data/.ruby-version +1 -0
- data/README.md +8 -1
- data/legitbot.gemspec +19 -19
- data/lib/legitbot.rb +2 -0
- data/lib/legitbot/duckduckgo.rb +4 -1
- data/lib/legitbot/legitbot.rb +2 -0
- data/lib/legitbot/oracle.rb +10 -0
- data/lib/legitbot/twitter.rb +14 -0
- data/lib/legitbot/validators/ip_ranges.rb +11 -7
- data/lib/legitbot/version.rb +1 -1
- data/lib/legitbot/yandex.rb +27 -14
- data/test/apple_test.rb +2 -2
- data/test/facebook_test.rb +5 -5
- data/test/legitbot/validators/ip_ranges_test.rb +9 -0
- data/test/oracle_test.rb +36 -0
- data/test/twitter_test.rb +36 -0
- metadata +68 -26
- data/.travis.yml +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1596ed8c3809fc3c3068f14d8bbbcf84232286fd7157fe724ba9515d195259cd
|
4
|
+
data.tar.gz: f0078d0404d752550adeeaa9c64954cbcec57fef3d0b37c98bc50d765c29bc1f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5640ae1e351bbd697325cd0bed0bdac45c3726e7b31923b7b5c8f51859ed025d4eb368d937a2b3bf9ef6c150d83418f8e019304dec3bc1b010572a4e1598c661
|
7
|
+
data.tar.gz: bf326da52d3adf1b2cfa3693b51cc763daef54c9b175cfb91b437b213722a0f578565db553f1240b6d6ee47d159bb418fdcc1db1f95cb14696b2bfb52e50b75d
|
@@ -0,0 +1,60 @@
|
|
1
|
+
name: build
|
2
|
+
|
3
|
+
on: [push]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
|
9
|
+
strategy:
|
10
|
+
fail-fast: false
|
11
|
+
matrix:
|
12
|
+
ruby: [ jruby, 2.6 ]
|
13
|
+
|
14
|
+
steps:
|
15
|
+
- uses: actions/checkout@v2
|
16
|
+
- name: Set up Ruby
|
17
|
+
uses: ruby/setup-ruby@v1
|
18
|
+
with:
|
19
|
+
ruby-version: ${{ matrix.ruby }}
|
20
|
+
- name: Cache dependencies
|
21
|
+
uses: actions/cache@v1
|
22
|
+
with:
|
23
|
+
path: vendor/bundle
|
24
|
+
key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
|
25
|
+
restore-keys: |
|
26
|
+
${{ runner.os }}-${{ matrix.ruby }}-gems-
|
27
|
+
- name: Install dependencies
|
28
|
+
run: |
|
29
|
+
bundle config path vendor/bundle
|
30
|
+
bundle install --jobs 4 --retry 3
|
31
|
+
- name: Run tests
|
32
|
+
run: bundle exec rake test
|
33
|
+
|
34
|
+
lint:
|
35
|
+
needs: test
|
36
|
+
runs-on: ubuntu-latest
|
37
|
+
|
38
|
+
strategy:
|
39
|
+
matrix:
|
40
|
+
ruby: [ 2.6 ]
|
41
|
+
|
42
|
+
steps:
|
43
|
+
- uses: actions/checkout@v2
|
44
|
+
- name: Set up Ruby
|
45
|
+
uses: ruby/setup-ruby@v1
|
46
|
+
with:
|
47
|
+
ruby-version: ${{ matrix.ruby }}
|
48
|
+
- name: Cache dependencies
|
49
|
+
uses: actions/cache@v1
|
50
|
+
with:
|
51
|
+
path: vendor/bundle
|
52
|
+
key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
|
53
|
+
restore-keys: |
|
54
|
+
${{ runner.os }}-${{ matrix.ruby }}-gems-
|
55
|
+
- name: Install dependencies
|
56
|
+
run: |
|
57
|
+
bundle config path vendor/bundle
|
58
|
+
bundle install --jobs 4 --retry 3
|
59
|
+
- name: Run linter
|
60
|
+
run: bundle exec rubocop
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.4
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Legitbot
|
1
|
+
# Legitbot  [](https://badge.fury.io/rb/legitbot)
|
2
2
|
|
3
3
|
Ruby gem to check that an IP belongs to a bot, typically a search
|
4
4
|
engine. This can be of help in protecting a web site from fake search
|
@@ -50,7 +50,9 @@ end
|
|
50
50
|
* [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
|
51
51
|
* [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
|
52
52
|
* [Google crawlers](https://support.google.com/webmasters/answer/1061943)
|
53
|
+
* [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
|
53
54
|
* [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
|
55
|
+
* [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
|
54
56
|
* [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
|
55
57
|
|
56
58
|
## License
|
@@ -65,3 +67,8 @@ Apache 2.0
|
|
65
67
|
detects bots by `User-Agent`
|
66
68
|
* [crawler_detect](https://github.com/loadkpi/crawler_detect) is a Ruby gem and Rack
|
67
69
|
middleware to detect crawlers by few different request headers, including `User-Agent`
|
70
|
+
* Project Honeypot's
|
71
|
+
[http:BL](https://www.projecthoneypot.org/httpbl_api.php) can not only
|
72
|
+
classify IP as a search engine, but also label them as suspicious and
|
73
|
+
reports the number of days since the last activity. My implementation of
|
74
|
+
the protocol in Scala is [here](https://github.com/osinka/httpbl).
|
data/legitbot.gemspec
CHANGED
@@ -1,28 +1,28 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
|
+
require 'legitbot/version'
|
4
5
|
|
5
6
|
Gem::Specification.new do |spec|
|
6
7
|
spec.name = 'legitbot'
|
7
8
|
spec.version = Legitbot::VERSION
|
8
9
|
spec.license = 'Apache-2.0'
|
9
10
|
|
10
|
-
spec.author =
|
11
|
-
spec.email =
|
12
|
-
spec.homepage =
|
13
|
-
spec.summary =
|
14
|
-
spec.description =
|
15
|
-
"made by a real search engine, not a malicious agent"
|
11
|
+
spec.author = 'Alexander Azarov'
|
12
|
+
spec.email = 'self@alaz.me'
|
13
|
+
spec.homepage = 'https://github.com/alaz/legitbot'
|
14
|
+
spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
|
15
|
+
spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
|
16
16
|
|
17
|
-
spec.required_ruby_version = '>= 2.
|
18
|
-
spec.add_dependency
|
19
|
-
spec.add_dependency
|
20
|
-
spec.add_development_dependency
|
21
|
-
spec.add_development_dependency
|
22
|
-
spec.add_development_dependency
|
23
|
-
spec.add_development_dependency
|
17
|
+
spec.required_ruby_version = '>= 2.4.0'
|
18
|
+
spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
|
19
|
+
spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
|
20
|
+
spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
|
21
|
+
spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
|
22
|
+
spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
|
23
|
+
spec.add_development_dependency 'rubocop', '~> 0.74', '>= 0.74.0'
|
24
24
|
|
25
|
-
spec.files = `git ls-files`.split(
|
26
|
-
spec.rdoc_options = [
|
27
|
-
spec.test_files = Dir.glob(
|
25
|
+
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
26
|
+
spec.rdoc_options = ['--charset=UTF-8']
|
27
|
+
spec.test_files = Dir.glob('test/**/*')
|
28
28
|
end
|
data/lib/legitbot.rb
CHANGED
@@ -10,5 +10,7 @@ require_relative 'legitbot/bing'
|
|
10
10
|
require_relative 'legitbot/duckduckgo'
|
11
11
|
require_relative 'legitbot/facebook'
|
12
12
|
require_relative 'legitbot/google'
|
13
|
+
require_relative 'legitbot/oracle'
|
13
14
|
require_relative 'legitbot/pinterest'
|
15
|
+
require_relative 'legitbot/twitter'
|
14
16
|
require_relative 'legitbot/yandex'
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -4,6 +4,8 @@ module Legitbot # :nodoc:
|
|
4
4
|
# https://duckduckgo.com/duckduckbot
|
5
5
|
class DuckDuckGo < BotMatch
|
6
6
|
ip_ranges %w[
|
7
|
+
23.21.227.69
|
8
|
+
40.88.21.235
|
7
9
|
50.16.241.113
|
8
10
|
50.16.241.114
|
9
11
|
50.16.241.117
|
@@ -12,7 +14,8 @@ module Legitbot # :nodoc:
|
|
12
14
|
52.5.190.19
|
13
15
|
54.197.234.188
|
14
16
|
54.208.100.253
|
15
|
-
|
17
|
+
54.208.102.37
|
18
|
+
107.21.1.8
|
16
19
|
]
|
17
20
|
end
|
18
21
|
|
data/lib/legitbot/legitbot.rb
CHANGED
@@ -18,6 +18,7 @@ module Legitbot
|
|
18
18
|
# otherwise.
|
19
19
|
# :yields: a found bot
|
20
20
|
#
|
21
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
21
22
|
def self.bot(user_agent, ip)
|
22
23
|
bots = @rules
|
23
24
|
.select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
|
@@ -32,6 +33,7 @@ module Legitbot
|
|
32
33
|
selected
|
33
34
|
end
|
34
35
|
end
|
36
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
35
37
|
|
36
38
|
def self.rule(clazz, fragments)
|
37
39
|
@rules << { class: clazz, fragments: fragments }
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
4
|
+
# https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started
|
5
|
+
# https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards
|
6
|
+
class Twitter < BotMatch
|
7
|
+
ip_ranges %w[
|
8
|
+
199.16.156.0/22
|
9
|
+
199.59.148.0/22
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
rule Legitbot::Twitter, %w[Twitterbot]
|
14
|
+
end
|
@@ -59,22 +59,26 @@ module Legitbot
|
|
59
59
|
partition_ips(@ip_ranges_loader.call)
|
60
60
|
end
|
61
61
|
|
62
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
63
62
|
def partition_ips(ips)
|
64
|
-
return []
|
63
|
+
return [] unless ips&.any?
|
65
64
|
|
66
65
|
ips
|
67
66
|
.map { |cidr| IPAddr.new(cidr) }
|
68
67
|
.partition(&:ipv4?)
|
69
68
|
.each_with_index
|
70
69
|
.map do |list, index|
|
71
|
-
|
72
|
-
(r.begin.to_i..r.end.to_i)
|
73
|
-
end
|
74
|
-
[FAMILIES[index], IntervalTree::Tree.new(ranges)]
|
70
|
+
[FAMILIES[index], build_interval_tree(list)]
|
75
71
|
end.to_h
|
76
72
|
end
|
77
|
-
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def build_interval_tree(list)
|
77
|
+
ranges = list.map(&:to_range).map do |r|
|
78
|
+
(r.begin.to_i..r.end.to_i)
|
79
|
+
end
|
80
|
+
IntervalTree::Tree.new(ranges)
|
81
|
+
end
|
78
82
|
end
|
79
83
|
end
|
80
84
|
end
|
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot/yandex.rb
CHANGED
@@ -7,27 +7,40 @@ module Legitbot # :nodoc:
|
|
7
7
|
end
|
8
8
|
|
9
9
|
rule Legitbot::Yandex, %w[
|
10
|
-
YandexBot
|
11
10
|
YandexAccessibilityBot
|
12
|
-
|
13
|
-
YandexDirectDyn
|
14
|
-
YandexScreenshotBot
|
15
|
-
YandexImages
|
16
|
-
YandexVideo
|
17
|
-
YandexVideoParser
|
18
|
-
YandexMedia
|
11
|
+
YandexAdNet
|
19
12
|
YandexBlogs
|
13
|
+
YandexBot/
|
14
|
+
YandexCalendar
|
15
|
+
YandexDirect/
|
16
|
+
YandexDirectDyn
|
20
17
|
YandexFavicons
|
21
|
-
YandexWebmaster
|
22
|
-
YandexPagechecker
|
23
|
-
YandexImageResizer
|
24
18
|
YaDirectFetcher
|
25
|
-
|
26
|
-
|
19
|
+
YandexForDomain
|
20
|
+
YandexImages
|
21
|
+
YandexImageResizer
|
22
|
+
YandexMobileBot
|
23
|
+
YandexMarket
|
24
|
+
YandexMedia
|
27
25
|
YandexMetrika
|
26
|
+
YandexMobileScreenShotBot
|
28
27
|
YandexNews
|
29
|
-
|
28
|
+
YandexOntoDB
|
29
|
+
YandexOntoDBAPI
|
30
|
+
YandexPagechecker
|
31
|
+
YandexPartner
|
32
|
+
YandexRCA
|
30
33
|
YandexSearchShop
|
34
|
+
YandexSitelinks
|
35
|
+
YandexSpravBot
|
36
|
+
YandexTracker
|
37
|
+
YandexTurbo
|
38
|
+
YandexVertis
|
31
39
|
YandexVerticals
|
40
|
+
YandexVideo
|
41
|
+
YandexVideoParser
|
42
|
+
YandexWebmaster
|
43
|
+
YandexScreenshotBot
|
44
|
+
YandexMedianaBot
|
32
45
|
]
|
33
46
|
end
|
data/test/apple_test.rb
CHANGED
@@ -16,7 +16,7 @@ class AppleTest < Minitest::Test
|
|
16
16
|
assert match.fake?, msg: "#{ip} is a fake Applebot IP"
|
17
17
|
end
|
18
18
|
|
19
|
-
# rubocop:disable
|
19
|
+
# rubocop:disable Layout/LineLength
|
20
20
|
def test_user_agent
|
21
21
|
bot = Legitbot.bot(
|
22
22
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)',
|
@@ -25,5 +25,5 @@ class AppleTest < Minitest::Test
|
|
25
25
|
assert_equal :apple, bot.detected_as
|
26
26
|
assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
|
27
27
|
end
|
28
|
-
# rubocop:enable
|
28
|
+
# rubocop:enable Layout/LineLength
|
29
29
|
end
|
data/test/facebook_test.rb
CHANGED
@@ -5,14 +5,14 @@ require 'legitbot'
|
|
5
5
|
|
6
6
|
module Legitbot
|
7
7
|
class Facebook
|
8
|
-
# rubocop:disable
|
8
|
+
# rubocop:disable Layout/LineLength
|
9
9
|
def self.whois
|
10
10
|
{
|
11
11
|
ipv4: ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24'],
|
12
12
|
ipv6: []
|
13
13
|
}
|
14
14
|
end
|
15
|
-
# rubocop:enable
|
15
|
+
# rubocop:enable Layout/LineLength
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
@@ -33,7 +33,7 @@ class FacebookTest < Minitest::Test
|
|
33
33
|
assert match.fake?, msg: "#{ip} is a fake Facebook IP"
|
34
34
|
end
|
35
35
|
|
36
|
-
# rubocop:disable Metrics/LineLength, Metrics/MethodLength
|
36
|
+
# rubocop:disable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
|
37
37
|
def test_user_agent
|
38
38
|
Legitbot.bot(
|
39
39
|
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
|
@@ -55,9 +55,9 @@ class FacebookTest < Minitest::Test
|
|
55
55
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0',
|
56
56
|
'92.243.181.7'
|
57
57
|
) do |bot|
|
58
|
-
|
58
|
+
assert %i[facebook twitter].include?(bot.detected_as)
|
59
59
|
assert bot.fake?, msg: 'fake Facebook'
|
60
60
|
end
|
61
61
|
end
|
62
|
-
# rubocop:enable Metrics/LineLength, Metrics/MethodLength
|
62
|
+
# rubocop:enable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
|
63
63
|
end
|
@@ -46,6 +46,11 @@ module Legitbot
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
+
class NilRanges
|
50
|
+
include IpRanges
|
51
|
+
ip_ranges { nil }
|
52
|
+
end
|
53
|
+
|
49
54
|
class IpRangesTest < Minitest::Test
|
50
55
|
def test_partition_method
|
51
56
|
empty = NoRanges.partition_ips([])
|
@@ -108,6 +113,10 @@ module Legitbot
|
|
108
113
|
assert_equal 2, LoadRanges.counter
|
109
114
|
end
|
110
115
|
# rubocop:enable Metrics/AbcSize
|
116
|
+
|
117
|
+
def test_nil_ranges
|
118
|
+
assert NilRanges.valid_ip?('127.0.0.1')
|
119
|
+
end
|
111
120
|
end
|
112
121
|
end
|
113
122
|
end
|
data/test/oracle_test.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'legitbot'
|
5
|
+
|
6
|
+
class OracleTest < Minitest::Test
|
7
|
+
def test_malicious_ip
|
8
|
+
ip = '149.210.164.47'
|
9
|
+
match = Legitbot::Oracle.new ip
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real Oracle IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = '148.64.56.64'
|
15
|
+
match = Legitbot::Oracle.new ip
|
16
|
+
assert match.valid?, msg: "#{ip} is a valid Oracle IP"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_malicious_ua
|
20
|
+
bot = Legitbot.bot(
|
21
|
+
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
22
|
+
'149.210.164.47'
|
23
|
+
)
|
24
|
+
assert bot, msg: 'Oracle detected from User-Agent'
|
25
|
+
assert !bot.valid?, msg: 'Not a valid Oracle'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_valid_ua
|
29
|
+
bot = Legitbot.bot(
|
30
|
+
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
31
|
+
'148.64.56.64'
|
32
|
+
)
|
33
|
+
assert bot, msg: 'Oracle detected from User-Agent'
|
34
|
+
assert bot.valid?, msg: 'Valid Oracle'
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'legitbot'
|
5
|
+
|
6
|
+
class TwitterTest < Minitest::Test
|
7
|
+
def test_malicious_ip
|
8
|
+
ip = '149.210.164.47'
|
9
|
+
match = Legitbot::Twitter.new ip
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real Twitter IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = '199.16.156.125'
|
15
|
+
match = Legitbot::Twitter.new ip
|
16
|
+
assert match.valid?, msg: "#{ip} is a valid Twitter IP"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_malicious_ua
|
20
|
+
bot = Legitbot.bot(
|
21
|
+
'Twitterbot/1.0',
|
22
|
+
'149.210.164.47'
|
23
|
+
)
|
24
|
+
assert bot, msg: 'Twitter detected from User-Agent'
|
25
|
+
assert !bot.valid?, msg: 'Not a valid Twitter'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_valid_ua
|
29
|
+
bot = Legitbot.bot(
|
30
|
+
'Twitterbot/1.0',
|
31
|
+
'199.16.156.125'
|
32
|
+
)
|
33
|
+
assert bot, msg: 'Twitter detected from User-Agent'
|
34
|
+
assert bot.valid?, msg: 'Valid Twitter'
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,47 +1,62 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-07-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: augmented_interval_tree
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.1'
|
17
20
|
- - ">="
|
18
21
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
22
|
+
version: 0.1.1
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.1'
|
24
30
|
- - ">="
|
25
31
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
32
|
+
version: 0.1.1
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
34
|
+
name: irrc
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0.2'
|
31
40
|
- - ">="
|
32
41
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
42
|
+
version: 0.2.1
|
34
43
|
type: :runtime
|
35
44
|
prerelease: false
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
37
46
|
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0.2'
|
38
50
|
- - ">="
|
39
51
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
52
|
+
version: 0.2.1
|
41
53
|
- !ruby/object:Gem::Dependency
|
42
54
|
name: bump
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
44
56
|
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0.8'
|
45
60
|
- - ">="
|
46
61
|
- !ruby/object:Gem::Version
|
47
62
|
version: 0.8.0
|
@@ -49,61 +64,83 @@ dependencies:
|
|
49
64
|
prerelease: false
|
50
65
|
version_requirements: !ruby/object:Gem::Requirement
|
51
66
|
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0.8'
|
52
70
|
- - ">="
|
53
71
|
- !ruby/object:Gem::Version
|
54
72
|
version: 0.8.0
|
55
73
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
74
|
+
name: minitest
|
57
75
|
requirement: !ruby/object:Gem::Requirement
|
58
76
|
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '5.1'
|
59
80
|
- - ">="
|
60
81
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
82
|
+
version: 5.1.0
|
62
83
|
type: :development
|
63
84
|
prerelease: false
|
64
85
|
version_requirements: !ruby/object:Gem::Requirement
|
65
86
|
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '5.1'
|
66
90
|
- - ">="
|
67
91
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
92
|
+
version: 5.1.0
|
69
93
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
94
|
+
name: rake
|
71
95
|
requirement: !ruby/object:Gem::Requirement
|
72
96
|
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '12.3'
|
73
100
|
- - ">="
|
74
101
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
102
|
+
version: 12.3.0
|
76
103
|
type: :development
|
77
104
|
prerelease: false
|
78
105
|
version_requirements: !ruby/object:Gem::Requirement
|
79
106
|
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '12.3'
|
80
110
|
- - ">="
|
81
111
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
112
|
+
version: 12.3.0
|
83
113
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
114
|
+
name: rubocop
|
85
115
|
requirement: !ruby/object:Gem::Requirement
|
86
116
|
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0.74'
|
87
120
|
- - ">="
|
88
121
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
122
|
+
version: 0.74.0
|
90
123
|
type: :development
|
91
124
|
prerelease: false
|
92
125
|
version_requirements: !ruby/object:Gem::Requirement
|
93
126
|
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0.74'
|
94
130
|
- - ">="
|
95
131
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
97
|
-
description:
|
98
|
-
|
132
|
+
version: 0.74.0
|
133
|
+
description: Does Web request come from a real search engine or from an impersonating
|
134
|
+
agent?
|
99
135
|
email: self@alaz.me
|
100
136
|
executables: []
|
101
137
|
extensions: []
|
102
138
|
extra_rdoc_files: []
|
103
139
|
files:
|
140
|
+
- ".github/workflows/build.yml"
|
104
141
|
- ".gitignore"
|
105
142
|
- ".rubocop.yml"
|
106
|
-
- ".
|
143
|
+
- ".ruby-version"
|
107
144
|
- Gemfile
|
108
145
|
- LICENSE.txt
|
109
146
|
- README.md
|
@@ -120,7 +157,9 @@ files:
|
|
120
157
|
- lib/legitbot/facebook.rb
|
121
158
|
- lib/legitbot/google.rb
|
122
159
|
- lib/legitbot/legitbot.rb
|
160
|
+
- lib/legitbot/oracle.rb
|
123
161
|
- lib/legitbot/pinterest.rb
|
162
|
+
- lib/legitbot/twitter.rb
|
124
163
|
- lib/legitbot/validators/domains.rb
|
125
164
|
- lib/legitbot/validators/ip_ranges.rb
|
126
165
|
- lib/legitbot/version.rb
|
@@ -134,12 +173,14 @@ files:
|
|
134
173
|
- test/legitbot/validators/domains_test.rb
|
135
174
|
- test/legitbot/validators/ip_ranges_test.rb
|
136
175
|
- test/legitbot_test.rb
|
176
|
+
- test/oracle_test.rb
|
137
177
|
- test/pinterest_test.rb
|
178
|
+
- test/twitter_test.rb
|
138
179
|
homepage: https://github.com/alaz/legitbot
|
139
180
|
licenses:
|
140
181
|
- Apache-2.0
|
141
182
|
metadata: {}
|
142
|
-
post_install_message:
|
183
|
+
post_install_message:
|
143
184
|
rdoc_options:
|
144
185
|
- "--charset=UTF-8"
|
145
186
|
require_paths:
|
@@ -148,16 +189,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
148
189
|
requirements:
|
149
190
|
- - ">="
|
150
191
|
- !ruby/object:Gem::Version
|
151
|
-
version: 2.
|
192
|
+
version: 2.4.0
|
152
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
153
194
|
requirements:
|
154
195
|
- - ">="
|
155
196
|
- !ruby/object:Gem::Version
|
156
197
|
version: '0'
|
157
198
|
requirements: []
|
158
|
-
|
159
|
-
|
160
|
-
signing_key:
|
199
|
+
rubygems_version: 3.1.2
|
200
|
+
signing_key:
|
161
201
|
specification_version: 4
|
162
202
|
summary: 'Validate requests from Web crawlers: impersonating or not?'
|
163
203
|
test_files:
|
@@ -168,6 +208,8 @@ test_files:
|
|
168
208
|
- test/ahrefs_test.rb
|
169
209
|
- test/apple_test.rb
|
170
210
|
- test/apple_as_google_test.rb
|
211
|
+
- test/oracle_test.rb
|
171
212
|
- test/google_test.rb
|
172
213
|
- test/botmatch_test.rb
|
173
214
|
- test/facebook_test.rb
|
215
|
+
- test/twitter_test.rb
|