legitbot 0.4.3 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8fda0548715a5620a167c849abfaccfc67685c4f214f55247203a947faa7ea83
4
- data.tar.gz: 2e60c4af14dbd7ac880850de0d4bb16a29936108b914860a0a00c4e9390d5bb9
3
+ metadata.gz: 6ddea94625712a0c61cf282d9e0cd16959e6af3ded7a9ac68afdd06ea12bfaef
4
+ data.tar.gz: 53964dca1f97a0814f1a1f01ef4d26b42caec804fdfce64f5db84127d9e13e71
5
5
  SHA512:
6
- metadata.gz: 5f2a3ab613d87e13a014778ef95a6780732251a28c4e74eaf823c4b292598cfe3a41d36db481977a8ce7cfd6c8206ab55a8ac19300949344792e3b5d339380e6
7
- data.tar.gz: 8ab7f3d7bc3e315fc485612b0f2c05e65205df2107f46390ca34a323960e470a6a3bd4b2086367738b6126af7038d725121086f45d77b9b6d635fae9a4e643dd
6
+ metadata.gz: 266e1b0c21f8180883390b40ef1e7907fc8756a66c52c0d05cb08d7d82ef29e807af1225b242b6c5745e1d69cebc1bd28fc3a8aa721ab989c614814d05f5bf7c
7
+ data.tar.gz: 4f424125b72d9e5efb91539d0f3c783b3c79d82720fdcbaa55f63ea31855ac3e1a677c2b48ce62c0a83d8354690ce358638797e1f6c207f224a2522a3c517c5b
@@ -0,0 +1,17 @@
1
+ root = true
2
+
3
+ [*]
4
+ end_of_line = lf
5
+ insert_final_newline = true
6
+ trim_trailing_whitespace = true
7
+ charset = utf-8
8
+
9
+ indent_style = space
10
+ indent_size = 2
11
+
12
+ [*.md]
13
+ trim_trailing_whitespace = false
14
+
15
+ [*.yml]
16
+ indent_style = space
17
+ indent_size = 2
@@ -0,0 +1,60 @@
1
+ name: build
2
+
3
+ on: [pull_request, push]
4
+
5
+ jobs:
6
+ test:
7
+ runs-on: ubuntu-latest
8
+
9
+ strategy:
10
+ fail-fast: false
11
+ matrix:
12
+ ruby: [ jruby, 2.6 ]
13
+
14
+ steps:
15
+ - uses: actions/checkout@v2
16
+ - name: Set up Ruby
17
+ uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby }}
20
+ - name: Cache dependencies
21
+ uses: actions/cache@v1
22
+ with:
23
+ path: vendor/bundle
24
+ key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
25
+ restore-keys: |
26
+ ${{ runner.os }}-${{ matrix.ruby }}-gems-
27
+ - name: Install dependencies
28
+ run: |
29
+ bundle config path vendor/bundle
30
+ bundle install --jobs 4 --retry 3
31
+ - name: Run tests
32
+ run: bundle exec rake test
33
+
34
+ lint:
35
+ needs: test
36
+ runs-on: ubuntu-latest
37
+
38
+ strategy:
39
+ matrix:
40
+ ruby: [ 2.6 ]
41
+
42
+ steps:
43
+ - uses: actions/checkout@v2
44
+ - name: Set up Ruby
45
+ uses: ruby/setup-ruby@v1
46
+ with:
47
+ ruby-version: ${{ matrix.ruby }}
48
+ - name: Cache dependencies
49
+ uses: actions/cache@v1
50
+ with:
51
+ path: vendor/bundle
52
+ key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
53
+ restore-keys: |
54
+ ${{ runner.os }}-${{ matrix.ruby }}-gems-
55
+ - name: Install dependencies
56
+ run: |
57
+ bundle config path vendor/bundle
58
+ bundle install --jobs 4 --retry 3
59
+ - name: Run linter
60
+ run: bundle exec rubocop
data/.gitignore CHANGED
@@ -4,3 +4,4 @@ Gemfile.lock
4
4
  *.gemfile.lock
5
5
  /pkg
6
6
  /tags
7
+ /vendor
@@ -1,9 +1,3 @@
1
1
  AllCops:
2
- Include:
3
- - '**/*.gemspec'
4
- - '**/Gemfile'
5
- - '**/Rakefile'
6
- - 'lib/**/*.rb'
7
- - 'test/**/*.rb'
8
- Exclude:
9
- - 'pkg/**'
2
+ CacheRootDirectory: 'vendor'
3
+ NewCops: enable
@@ -0,0 +1 @@
1
+ 2.4
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Legitbot [![Build Status](https://secure.travis-ci.org/alaz/legitbot.png?branch=master)](http://travis-ci.org/alaz/legitbot) [![Gem Version](https://badge.fury.io/rb/legitbot.svg)](https://badge.fury.io/rb/legitbot)
1
+ # Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) [![Gem Version](https://badge.fury.io/rb/legitbot.svg)](https://badge.fury.io/rb/legitbot)
2
2
 
3
3
  Ruby gem to check that an IP belongs to a bot, typically a search
4
4
  engine. This can be of help in protecting a web site from fake search
@@ -44,6 +44,7 @@ end
44
44
  ## Supported
45
45
 
46
46
  * [Ahrefs](https://ahrefs.com/robot)
47
+ * [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
47
48
  * [Applebot](https://support.apple.com/en-us/HT204683)
48
49
  * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
49
50
  * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
@@ -67,3 +68,8 @@ Apache 2.0
67
68
  detects bots by `User-Agent`
68
69
  * [crawler_detect](https://github.com/loadkpi/crawler_detect) is a Ruby gem and Rack
69
70
  middleware to detect crawlers by few different request headers, including `User-Agent`
71
+ * Project Honeypot's
72
+ [http:BL](https://www.projecthoneypot.org/httpbl_api.php) can not only
73
+ classify IP as a search engine, but also label them as suspicious and
74
+ reports the number of days since the last activity. My implementation of
75
+ the protocol in Scala is [here](https://github.com/osinka/httpbl).
@@ -14,13 +14,13 @@ Gem::Specification.new do |spec|
14
14
  spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
15
15
  spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
16
16
 
17
- spec.required_ruby_version = '>= 2.3.0'
17
+ spec.required_ruby_version = '>= 2.4.0'
18
18
  spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
19
19
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
20
20
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
21
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
22
  spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
23
- spec.add_development_dependency 'rubocop', '~> 0.74', '>= 0.74.0'
23
+ spec.add_development_dependency 'rubocop', '~> 0.90', '< 0.91'
24
24
 
25
25
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
26
  spec.rdoc_options = ['--charset=UTF-8']
@@ -4,6 +4,7 @@ require_relative 'legitbot/legitbot'
4
4
  require_relative 'legitbot/botmatch'
5
5
 
6
6
  require_relative 'legitbot/ahrefs'
7
+ require_relative 'legitbot/alexa'
7
8
  require_relative 'legitbot/apple'
8
9
  require_relative 'legitbot/baidu'
9
10
  require_relative 'legitbot/bing'
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-
5
+ # https://support.alexa.com/hc/en-us/articles/200462340
6
+ # https://support.alexa.com/hc/en-us/articles/200450194
7
+ class Alexa < BotMatch
8
+ ip_ranges %w[
9
+ 52.86.176.3
10
+ 52.4.48.181
11
+ 52.2.182.169
12
+ 52.86.185.29
13
+ ]
14
+ end
15
+
16
+ rule Legitbot::Alexa, %w[Alexabot ia_archiver]
17
+ end
@@ -18,6 +18,7 @@ module Legitbot
18
18
  # otherwise.
19
19
  # :yields: a found bot
20
20
  #
21
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
21
22
  def self.bot(user_agent, ip)
22
23
  bots = @rules
23
24
  .select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
@@ -32,6 +33,7 @@ module Legitbot
32
33
  selected
33
34
  end
34
35
  end
36
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
35
37
 
36
38
  def self.rule(clazz, fragments)
37
39
  @rules << { class: clazz, fragments: fragments }
@@ -40,7 +40,7 @@ module Legitbot
40
40
 
41
41
  obj = IPAddr.new(ip)
42
42
  ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i)
43
- !ranges.empty?
43
+ !ranges.nil? && !ranges.empty?
44
44
  end
45
45
 
46
46
  def valid_ips
@@ -59,22 +59,26 @@ module Legitbot
59
59
  partition_ips(@ip_ranges_loader.call)
60
60
  end
61
61
 
62
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
63
62
  def partition_ips(ips)
64
- return [] if ips.empty?
63
+ return [] unless ips&.any?
65
64
 
66
65
  ips
67
66
  .map { |cidr| IPAddr.new(cidr) }
68
67
  .partition(&:ipv4?)
69
68
  .each_with_index
70
69
  .map do |list, index|
71
- ranges = list.map(&:to_range).map do |r|
72
- (r.begin.to_i..r.end.to_i)
73
- end
74
- [FAMILIES[index], IntervalTree::Tree.new(ranges)]
70
+ [FAMILIES[index], build_interval_tree(list)]
75
71
  end.to_h
76
72
  end
77
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
73
+
74
+ private
75
+
76
+ def build_interval_tree(list)
77
+ ranges = list.map(&:to_range).map do |r|
78
+ (r.begin.to_i..r.end.to_i)
79
+ end
80
+ IntervalTree::Tree.new(ranges)
81
+ end
78
82
  end
79
83
  end
80
84
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '0.4.3'
4
+ VERSION = '1.1.1'
5
5
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class AlexaTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Alexa.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real Alexa IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '52.86.176.3'
15
+ match = Legitbot::Alexa.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid Alexa IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'Alexa detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid Alexa'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
31
+ '52.86.176.3'
32
+ )
33
+ assert bot, msg: 'Alexa detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid Alexa'
35
+ end
36
+ end
@@ -16,7 +16,7 @@ class AppleTest < Minitest::Test
16
16
  assert match.fake?, msg: "#{ip} is a fake Applebot IP"
17
17
  end
18
18
 
19
- # rubocop:disable Metrics/LineLength
19
+ # rubocop:disable Layout/LineLength
20
20
  def test_user_agent
21
21
  bot = Legitbot.bot(
22
22
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)',
@@ -25,5 +25,5 @@ class AppleTest < Minitest::Test
25
25
  assert_equal :apple, bot.detected_as
26
26
  assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
27
27
  end
28
- # rubocop:enable Metrics/LineLength
28
+ # rubocop:enable Layout/LineLength
29
29
  end
@@ -5,14 +5,14 @@ require 'legitbot'
5
5
 
6
6
  module Legitbot
7
7
  class Facebook
8
- # rubocop:disable Metrics/LineLength
8
+ # rubocop:disable Layout/LineLength
9
9
  def self.whois
10
10
  {
11
11
  ipv4: ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24'],
12
12
  ipv6: []
13
13
  }
14
14
  end
15
- # rubocop:enable Metrics/LineLength
15
+ # rubocop:enable Layout/LineLength
16
16
  end
17
17
  end
18
18
 
@@ -33,7 +33,7 @@ class FacebookTest < Minitest::Test
33
33
  assert match.fake?, msg: "#{ip} is a fake Facebook IP"
34
34
  end
35
35
 
36
- # rubocop:disable Metrics/LineLength, Metrics/MethodLength
36
+ # rubocop:disable Layout/LineLength, Metrics/MethodLength
37
37
  def test_user_agent
38
38
  Legitbot.bot(
39
39
  'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
@@ -59,5 +59,5 @@ class FacebookTest < Minitest::Test
59
59
  assert bot.fake?, msg: 'fake Facebook'
60
60
  end
61
61
  end
62
- # rubocop:enable Metrics/LineLength, Metrics/MethodLength
62
+ # rubocop:enable Layout/LineLength, Metrics/MethodLength
63
63
  end
@@ -46,6 +46,16 @@ module Legitbot
46
46
  end
47
47
  end
48
48
 
49
+ class NilRanges
50
+ include IpRanges
51
+ ip_ranges { nil }
52
+ end
53
+
54
+ class Ipv4Ranges
55
+ include IpRanges
56
+ ip_ranges { ['66.220.144.0/21'] }
57
+ end
58
+
49
59
  class IpRangesTest < Minitest::Test
50
60
  def test_partition_method
51
61
  empty = NoRanges.partition_ips([])
@@ -108,6 +118,14 @@ module Legitbot
108
118
  assert_equal 2, LoadRanges.counter
109
119
  end
110
120
  # rubocop:enable Metrics/AbcSize
121
+
122
+ def test_nil_ranges
123
+ assert NilRanges.valid_ip?('127.0.0.1')
124
+ end
125
+
126
+ def test_ipv4_only_ranges
127
+ refute Ipv4Ranges.valid_ip?('2a03:2880:f234:0:0:0:0:1')
128
+ end
111
129
  end
112
130
  end
113
131
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-19 00:00:00.000000000 Z
11
+ date: 2020-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -116,20 +116,20 @@ dependencies:
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: '0.74'
120
- - - ">="
119
+ version: '0.90'
120
+ - - "<"
121
121
  - !ruby/object:Gem::Version
122
- version: 0.74.0
122
+ version: '0.91'
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: '0.74'
130
- - - ">="
129
+ version: '0.90'
130
+ - - "<"
131
131
  - !ruby/object:Gem::Version
132
- version: 0.74.0
132
+ version: '0.91'
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -137,9 +137,11 @@ executables: []
137
137
  extensions: []
138
138
  extra_rdoc_files: []
139
139
  files:
140
+ - ".editorconfig"
141
+ - ".github/workflows/build.yml"
140
142
  - ".gitignore"
141
143
  - ".rubocop.yml"
142
- - ".travis.yml"
144
+ - ".ruby-version"
143
145
  - Gemfile
144
146
  - LICENSE.txt
145
147
  - README.md
@@ -147,6 +149,7 @@ files:
147
149
  - legitbot.gemspec
148
150
  - lib/legitbot.rb
149
151
  - lib/legitbot/ahrefs.rb
152
+ - lib/legitbot/alexa.rb
150
153
  - lib/legitbot/apple.rb
151
154
  - lib/legitbot/baidu.rb
152
155
  - lib/legitbot/bing.rb
@@ -164,6 +167,7 @@ files:
164
167
  - lib/legitbot/version.rb
165
168
  - lib/legitbot/yandex.rb
166
169
  - test/ahrefs_test.rb
170
+ - test/alexa_test.rb
167
171
  - test/apple_as_google_test.rb
168
172
  - test/apple_test.rb
169
173
  - test/botmatch_test.rb
@@ -179,7 +183,7 @@ homepage: https://github.com/alaz/legitbot
179
183
  licenses:
180
184
  - Apache-2.0
181
185
  metadata: {}
182
- post_install_message:
186
+ post_install_message:
183
187
  rdoc_options:
184
188
  - "--charset=UTF-8"
185
189
  require_paths:
@@ -188,7 +192,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
188
192
  requirements:
189
193
  - - ">="
190
194
  - !ruby/object:Gem::Version
191
- version: 2.3.0
195
+ version: 2.4.0
192
196
  required_rubygems_version: !ruby/object:Gem::Requirement
193
197
  requirements:
194
198
  - - ">="
@@ -196,7 +200,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
196
200
  version: '0'
197
201
  requirements: []
198
202
  rubygems_version: 3.1.2
199
- signing_key:
203
+ signing_key:
200
204
  specification_version: 4
201
205
  summary: 'Validate requests from Web crawlers: impersonating or not?'
202
206
  test_files:
@@ -204,6 +208,7 @@ test_files:
204
208
  - test/legitbot/validators/domains_test.rb
205
209
  - test/legitbot/validators/ip_ranges_test.rb
206
210
  - test/pinterest_test.rb
211
+ - test/alexa_test.rb
207
212
  - test/ahrefs_test.rb
208
213
  - test/apple_test.rb
209
214
  - test/apple_as_google_test.rb
@@ -1,12 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- cache: bundler
4
- rvm:
5
- - 2.5
6
- - 2.3
7
- - jruby-head
8
- jdk:
9
- - openjdk8
10
- before_install:
11
- - gem update --system
12
- - gem install bundler