legitbot 1.0.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1596ed8c3809fc3c3068f14d8bbbcf84232286fd7157fe724ba9515d195259cd
4
- data.tar.gz: f0078d0404d752550adeeaa9c64954cbcec57fef3d0b37c98bc50d765c29bc1f
3
+ metadata.gz: d709839505242be7ed07c5f6413e607bd8eb127a011765b9f45c1a3e0bba2fbb
4
+ data.tar.gz: 53a19413068473299ce78887756a2ef50f59859e54d652e4314755178a30028c
5
5
  SHA512:
6
- metadata.gz: 5640ae1e351bbd697325cd0bed0bdac45c3726e7b31923b7b5c8f51859ed025d4eb368d937a2b3bf9ef6c150d83418f8e019304dec3bc1b010572a4e1598c661
7
- data.tar.gz: bf326da52d3adf1b2cfa3693b51cc763daef54c9b175cfb91b437b213722a0f578565db553f1240b6d6ee47d159bb418fdcc1db1f95cb14696b2bfb52e50b75d
6
+ metadata.gz: daaf7bbde5fae2ab3937b5e4f4cd936a61f54b80d5ec87eaca8db473977f633c993f267f6aeb101b94404119e58835878ee1433683cf613b476c6fc7f27b34b3
7
+ data.tar.gz: 1e51fe4ee02eb14bb0a95230e2ca3d4716a56cee199a40f3cfcd4d0999158526f151fa7915a9a3f0a21a9054ebc3862353e1a68a89057c2cf8bbcac4d082afb8
@@ -0,0 +1,17 @@
1
+ root = true
2
+
3
+ [*]
4
+ end_of_line = lf
5
+ insert_final_newline = true
6
+ trim_trailing_whitespace = true
7
+ charset = utf-8
8
+
9
+ indent_style = space
10
+ indent_size = 2
11
+
12
+ [*.md]
13
+ trim_trailing_whitespace = false
14
+
15
+ [*.yml]
16
+ indent_style = space
17
+ indent_size = 2
@@ -1,6 +1,6 @@
1
1
  name: build
2
2
 
3
- on: [push]
3
+ on: [pull_request, push]
4
4
 
5
5
  jobs:
6
6
  test:
@@ -1,2 +1,3 @@
1
1
  AllCops:
2
2
  CacheRootDirectory: 'vendor'
3
+ NewCops: enable
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) [![Gem Version](https://badge.fury.io/rb/legitbot.svg)](https://badge.fury.io/rb/legitbot)
1
+ # Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) ![](https://badge.fury.io/rb/legitbot.svg)
2
2
 
3
3
  Ruby gem to check that an IP belongs to a bot, typically a search
4
4
  engine. This can be of help in protecting a web site from fake search
@@ -44,6 +44,7 @@ end
44
44
  ## Supported
45
45
 
46
46
  * [Ahrefs](https://ahrefs.com/robot)
47
+ * [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
47
48
  * [Applebot](https://support.apple.com/en-us/HT204683)
48
49
  * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
49
50
  * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
@@ -54,6 +55,7 @@ end
54
55
  * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
55
56
  * [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
56
57
  * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
58
+ * [Petal robots (Huawei search)](http://aspiegel.com/petalbot)
57
59
 
58
60
  ## License
59
61
 
@@ -19,8 +19,8 @@ Gem::Specification.new do |spec|
19
19
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
20
20
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
21
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
- spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
23
- spec.add_development_dependency 'rubocop', '~> 0.74', '>= 0.74.0'
22
+ spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
23
+ spec.add_development_dependency 'rubocop', '~> 0.92.0', '>= 0.92.0'
24
24
 
25
25
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
26
  spec.rdoc_options = ['--charset=UTF-8']
@@ -4,6 +4,7 @@ require_relative 'legitbot/legitbot'
4
4
  require_relative 'legitbot/botmatch'
5
5
 
6
6
  require_relative 'legitbot/ahrefs'
7
+ require_relative 'legitbot/alexa'
7
8
  require_relative 'legitbot/apple'
8
9
  require_relative 'legitbot/baidu'
9
10
  require_relative 'legitbot/bing'
@@ -14,3 +15,4 @@ require_relative 'legitbot/oracle'
14
15
  require_relative 'legitbot/pinterest'
15
16
  require_relative 'legitbot/twitter'
16
17
  require_relative 'legitbot/yandex'
18
+ require_relative 'legitbot/petalbot'
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-
5
+ # https://support.alexa.com/hc/en-us/articles/200462340
6
+ # https://support.alexa.com/hc/en-us/articles/200450194
7
+ class Alexa < BotMatch
8
+ ip_ranges %w[
9
+ 52.86.176.3
10
+ 52.4.48.181
11
+ 52.2.182.169
12
+ 52.86.185.29
13
+ ]
14
+ end
15
+
16
+ rule Legitbot::Alexa, %w[Alexabot ia_archiver]
17
+ end
@@ -8,13 +8,5 @@ module Legitbot # :nodoc:
8
8
  ip_ranges '17.0.0.0/8'
9
9
  end
10
10
 
11
- # https://support.apple.com/en-us/HT204683
12
- # rubocop:disable Naming/ClassAndModuleCamelCase
13
- class Apple_as_Google < BotMatch
14
- ip_ranges '17.0.0.0/8'
15
- end
16
- # rubocop:enable Naming/ClassAndModuleCamelCase
17
-
18
11
  rule Legitbot::Apple, %w[Applebot]
19
- rule Legitbot::Apple_as_Google, %w[Googlebot]
20
12
  end
@@ -18,7 +18,7 @@ module Legitbot
18
18
  # otherwise.
19
19
  # :yields: a found bot
20
20
  #
21
- # rubocop:disable Metrics/CyclomaticComplexity
21
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
22
22
  def self.bot(user_agent, ip)
23
23
  bots = @rules
24
24
  .select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
@@ -33,7 +33,7 @@ module Legitbot
33
33
  selected
34
34
  end
35
35
  end
36
- # rubocop:enable Metrics/CyclomaticComplexity
36
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
37
37
 
38
38
  def self.rule(clazz, fragments)
39
39
  @rules << { class: clazz, fragments: fragments }
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # http://aspiegel.com/petalbot
5
+ class Petalbot < BotMatch
6
+ domains 'aspiegel.com.'
7
+ end
8
+
9
+ rule Legitbot::Petalbot, %w[PetalBot]
10
+ end
@@ -40,7 +40,7 @@ module Legitbot
40
40
 
41
41
  obj = IPAddr.new(ip)
42
42
  ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i)
43
- !ranges.empty?
43
+ !ranges.nil? && !ranges.empty?
44
44
  end
45
45
 
46
46
  def valid_ips
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.0.1'
4
+ VERSION = '1.3.0'
5
5
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class AlexaTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Alexa.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real Alexa IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '52.86.176.3'
15
+ match = Legitbot::Alexa.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid Alexa IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'Alexa detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid Alexa'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
31
+ '52.86.176.3'
32
+ )
33
+ assert bot, msg: 'Alexa detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid Alexa'
35
+ end
36
+ end
@@ -33,7 +33,7 @@ class FacebookTest < Minitest::Test
33
33
  assert match.fake?, msg: "#{ip} is a fake Facebook IP"
34
34
  end
35
35
 
36
- # rubocop:disable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
36
+ # rubocop:disable Layout/LineLength, Metrics/MethodLength
37
37
  def test_user_agent
38
38
  Legitbot.bot(
39
39
  'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
@@ -59,5 +59,5 @@ class FacebookTest < Minitest::Test
59
59
  assert bot.fake?, msg: 'fake Facebook'
60
60
  end
61
61
  end
62
- # rubocop:enable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
62
+ # rubocop:enable Layout/LineLength, Metrics/MethodLength
63
63
  end
@@ -51,6 +51,11 @@ module Legitbot
51
51
  ip_ranges { nil }
52
52
  end
53
53
 
54
+ class Ipv4Ranges
55
+ include IpRanges
56
+ ip_ranges { ['66.220.144.0/21'] }
57
+ end
58
+
54
59
  class IpRangesTest < Minitest::Test
55
60
  def test_partition_method
56
61
  empty = NoRanges.partition_ips([])
@@ -117,6 +122,10 @@ module Legitbot
117
122
  def test_nil_ranges
118
123
  assert NilRanges.valid_ip?('127.0.0.1')
119
124
  end
125
+
126
+ def test_ipv4_only_ranges
127
+ refute Ipv4Ranges.valid_ip?('2a03:2880:f234:0:0:0:0:1')
128
+ end
120
129
  end
121
130
  end
122
131
  end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class PetalbotTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Petalbot.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real Petalbot IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '114.119.153.50'
15
+ match = Legitbot::Petalbot.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid Petalbot IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'Petalbot detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid Petalbot'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
31
+ '114.119.153.50'
32
+ )
33
+ assert bot, msg: 'Petalbot detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid Petalbot'
35
+ end
36
+
37
+ def test_valid_name
38
+ bot = Legitbot.bot(
39
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
40
+ '66.249.64.141'
41
+ )
42
+ assert_equal :petalbot, bot.detected_as
43
+ end
44
+
45
+ def test_fake_name
46
+ bot = Legitbot.bot(
47
+ 'Mozilla/5.0 (compatible; PetalBot/2.1; +http://www.google.com/bot.html)',
48
+ '81.1.172.108'
49
+ )
50
+ assert_equal :petalbot, bot.detected_as
51
+ end
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-28 00:00:00.000000000 Z
11
+ date: 2020-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -96,40 +96,40 @@ dependencies:
96
96
  requirements:
97
97
  - - "~>"
98
98
  - !ruby/object:Gem::Version
99
- version: '12.3'
99
+ version: '13.0'
100
100
  - - ">="
101
101
  - !ruby/object:Gem::Version
102
- version: 12.3.0
102
+ version: 13.0.0
103
103
  type: :development
104
104
  prerelease: false
105
105
  version_requirements: !ruby/object:Gem::Requirement
106
106
  requirements:
107
107
  - - "~>"
108
108
  - !ruby/object:Gem::Version
109
- version: '12.3'
109
+ version: '13.0'
110
110
  - - ">="
111
111
  - !ruby/object:Gem::Version
112
- version: 12.3.0
112
+ version: 13.0.0
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: rubocop
115
115
  requirement: !ruby/object:Gem::Requirement
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: '0.74'
119
+ version: 0.92.0
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 0.74.0
122
+ version: 0.92.0
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: '0.74'
129
+ version: 0.92.0
130
130
  - - ">="
131
131
  - !ruby/object:Gem::Version
132
- version: 0.74.0
132
+ version: 0.92.0
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -137,6 +137,7 @@ executables: []
137
137
  extensions: []
138
138
  extra_rdoc_files: []
139
139
  files:
140
+ - ".editorconfig"
140
141
  - ".github/workflows/build.yml"
141
142
  - ".gitignore"
142
143
  - ".rubocop.yml"
@@ -148,6 +149,7 @@ files:
148
149
  - legitbot.gemspec
149
150
  - lib/legitbot.rb
150
151
  - lib/legitbot/ahrefs.rb
152
+ - lib/legitbot/alexa.rb
151
153
  - lib/legitbot/apple.rb
152
154
  - lib/legitbot/baidu.rb
153
155
  - lib/legitbot/bing.rb
@@ -158,6 +160,7 @@ files:
158
160
  - lib/legitbot/google.rb
159
161
  - lib/legitbot/legitbot.rb
160
162
  - lib/legitbot/oracle.rb
163
+ - lib/legitbot/petalbot.rb
161
164
  - lib/legitbot/pinterest.rb
162
165
  - lib/legitbot/twitter.rb
163
166
  - lib/legitbot/validators/domains.rb
@@ -165,7 +168,7 @@ files:
165
168
  - lib/legitbot/version.rb
166
169
  - lib/legitbot/yandex.rb
167
170
  - test/ahrefs_test.rb
168
- - test/apple_as_google_test.rb
171
+ - test/alexa_test.rb
169
172
  - test/apple_test.rb
170
173
  - test/botmatch_test.rb
171
174
  - test/facebook_test.rb
@@ -174,6 +177,7 @@ files:
174
177
  - test/legitbot/validators/ip_ranges_test.rb
175
178
  - test/legitbot_test.rb
176
179
  - test/oracle_test.rb
180
+ - test/petalbot_test.rb
177
181
  - test/pinterest_test.rb
178
182
  - test/twitter_test.rb
179
183
  homepage: https://github.com/alaz/legitbot
@@ -196,7 +200,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
196
200
  - !ruby/object:Gem::Version
197
201
  version: '0'
198
202
  requirements: []
199
- rubygems_version: 3.1.2
203
+ rubygems_version: 3.1.4
200
204
  signing_key:
201
205
  specification_version: 4
202
206
  summary: 'Validate requests from Web crawlers: impersonating or not?'
@@ -205,11 +209,12 @@ test_files:
205
209
  - test/legitbot/validators/domains_test.rb
206
210
  - test/legitbot/validators/ip_ranges_test.rb
207
211
  - test/pinterest_test.rb
212
+ - test/alexa_test.rb
208
213
  - test/ahrefs_test.rb
209
214
  - test/apple_test.rb
210
- - test/apple_as_google_test.rb
211
215
  - test/oracle_test.rb
212
216
  - test/google_test.rb
217
+ - test/petalbot_test.rb
213
218
  - test/botmatch_test.rb
214
219
  - test/facebook_test.rb
215
220
  - test/twitter_test.rb
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'minitest/autorun'
4
- require 'legitbot'
5
-
6
- class AppleAsGoogleTest < Minitest::Test
7
- def test_valid_ip
8
- ip = '17.58.98.60'
9
- match = Legitbot::Apple_as_Google.new(ip)
10
- assert match.valid?, msg: "#{ip} is a valid Applebot IP"
11
- end
12
-
13
- def test_invalid_ip
14
- ip = '127.0.0.1'
15
- match = Legitbot::Apple_as_Google.new(ip)
16
- assert match.fake?, msg: "#{ip} is a fake Applebot IP"
17
- end
18
-
19
- def test_user_agent
20
- bot = Legitbot.bot(
21
- 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
22
- '17.58.98.60'
23
- )
24
- assert_equal :apple_as_google, bot.detected_as
25
- assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
26
- end
27
- end