legitbot 1.0.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1596ed8c3809fc3c3068f14d8bbbcf84232286fd7157fe724ba9515d195259cd
4
- data.tar.gz: f0078d0404d752550adeeaa9c64954cbcec57fef3d0b37c98bc50d765c29bc1f
3
+ metadata.gz: d709839505242be7ed07c5f6413e607bd8eb127a011765b9f45c1a3e0bba2fbb
4
+ data.tar.gz: 53a19413068473299ce78887756a2ef50f59859e54d652e4314755178a30028c
5
5
  SHA512:
6
- metadata.gz: 5640ae1e351bbd697325cd0bed0bdac45c3726e7b31923b7b5c8f51859ed025d4eb368d937a2b3bf9ef6c150d83418f8e019304dec3bc1b010572a4e1598c661
7
- data.tar.gz: bf326da52d3adf1b2cfa3693b51cc763daef54c9b175cfb91b437b213722a0f578565db553f1240b6d6ee47d159bb418fdcc1db1f95cb14696b2bfb52e50b75d
6
+ metadata.gz: daaf7bbde5fae2ab3937b5e4f4cd936a61f54b80d5ec87eaca8db473977f633c993f267f6aeb101b94404119e58835878ee1433683cf613b476c6fc7f27b34b3
7
+ data.tar.gz: 1e51fe4ee02eb14bb0a95230e2ca3d4716a56cee199a40f3cfcd4d0999158526f151fa7915a9a3f0a21a9054ebc3862353e1a68a89057c2cf8bbcac4d082afb8
@@ -0,0 +1,17 @@
1
+ root = true
2
+
3
+ [*]
4
+ end_of_line = lf
5
+ insert_final_newline = true
6
+ trim_trailing_whitespace = true
7
+ charset = utf-8
8
+
9
+ indent_style = space
10
+ indent_size = 2
11
+
12
+ [*.md]
13
+ trim_trailing_whitespace = false
14
+
15
+ [*.yml]
16
+ indent_style = space
17
+ indent_size = 2
@@ -1,6 +1,6 @@
1
1
  name: build
2
2
 
3
- on: [push]
3
+ on: [pull_request, push]
4
4
 
5
5
  jobs:
6
6
  test:
@@ -1,2 +1,3 @@
1
1
  AllCops:
2
2
  CacheRootDirectory: 'vendor'
3
+ NewCops: enable
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) [![Gem Version](https://badge.fury.io/rb/legitbot.svg)](https://badge.fury.io/rb/legitbot)
1
+ # Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) ![](https://badge.fury.io/rb/legitbot.svg)
2
2
 
3
3
  Ruby gem to check that an IP belongs to a bot, typically a search
4
4
  engine. This can be of help in protecting a web site from fake search
@@ -44,6 +44,7 @@ end
44
44
  ## Supported
45
45
 
46
46
  * [Ahrefs](https://ahrefs.com/robot)
47
+ * [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
47
48
  * [Applebot](https://support.apple.com/en-us/HT204683)
48
49
  * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
49
50
  * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
@@ -54,6 +55,7 @@ end
54
55
  * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
55
56
  * [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
56
57
  * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
58
+ * [Petal robots (Huawei search)](http://aspiegel.com/petalbot)
57
59
 
58
60
  ## License
59
61
 
@@ -19,8 +19,8 @@ Gem::Specification.new do |spec|
19
19
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
20
20
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
21
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
- spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
23
- spec.add_development_dependency 'rubocop', '~> 0.74', '>= 0.74.0'
22
+ spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
23
+ spec.add_development_dependency 'rubocop', '~> 0.92.0', '>= 0.92.0'
24
24
 
25
25
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
26
  spec.rdoc_options = ['--charset=UTF-8']
@@ -4,6 +4,7 @@ require_relative 'legitbot/legitbot'
4
4
  require_relative 'legitbot/botmatch'
5
5
 
6
6
  require_relative 'legitbot/ahrefs'
7
+ require_relative 'legitbot/alexa'
7
8
  require_relative 'legitbot/apple'
8
9
  require_relative 'legitbot/baidu'
9
10
  require_relative 'legitbot/bing'
@@ -14,3 +15,4 @@ require_relative 'legitbot/oracle'
14
15
  require_relative 'legitbot/pinterest'
15
16
  require_relative 'legitbot/twitter'
16
17
  require_relative 'legitbot/yandex'
18
+ require_relative 'legitbot/petalbot'
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-
5
+ # https://support.alexa.com/hc/en-us/articles/200462340
6
+ # https://support.alexa.com/hc/en-us/articles/200450194
7
+ class Alexa < BotMatch
8
+ ip_ranges %w[
9
+ 52.86.176.3
10
+ 52.4.48.181
11
+ 52.2.182.169
12
+ 52.86.185.29
13
+ ]
14
+ end
15
+
16
+ rule Legitbot::Alexa, %w[Alexabot ia_archiver]
17
+ end
@@ -8,13 +8,5 @@ module Legitbot # :nodoc:
8
8
  ip_ranges '17.0.0.0/8'
9
9
  end
10
10
 
11
- # https://support.apple.com/en-us/HT204683
12
- # rubocop:disable Naming/ClassAndModuleCamelCase
13
- class Apple_as_Google < BotMatch
14
- ip_ranges '17.0.0.0/8'
15
- end
16
- # rubocop:enable Naming/ClassAndModuleCamelCase
17
-
18
11
  rule Legitbot::Apple, %w[Applebot]
19
- rule Legitbot::Apple_as_Google, %w[Googlebot]
20
12
  end
@@ -18,7 +18,7 @@ module Legitbot
18
18
  # otherwise.
19
19
  # :yields: a found bot
20
20
  #
21
- # rubocop:disable Metrics/CyclomaticComplexity
21
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
22
22
  def self.bot(user_agent, ip)
23
23
  bots = @rules
24
24
  .select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
@@ -33,7 +33,7 @@ module Legitbot
33
33
  selected
34
34
  end
35
35
  end
36
- # rubocop:enable Metrics/CyclomaticComplexity
36
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
37
37
 
38
38
  def self.rule(clazz, fragments)
39
39
  @rules << { class: clazz, fragments: fragments }
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # http://aspiegel.com/petalbot
5
+ class Petalbot < BotMatch
6
+ domains 'aspiegel.com.'
7
+ end
8
+
9
+ rule Legitbot::Petalbot, %w[PetalBot]
10
+ end
@@ -40,7 +40,7 @@ module Legitbot
40
40
 
41
41
  obj = IPAddr.new(ip)
42
42
  ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i)
43
- !ranges.empty?
43
+ !ranges.nil? && !ranges.empty?
44
44
  end
45
45
 
46
46
  def valid_ips
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.0.1'
4
+ VERSION = '1.3.0'
5
5
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class AlexaTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Alexa.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real Alexa IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '52.86.176.3'
15
+ match = Legitbot::Alexa.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid Alexa IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'Alexa detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid Alexa'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
31
+ '52.86.176.3'
32
+ )
33
+ assert bot, msg: 'Alexa detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid Alexa'
35
+ end
36
+ end
@@ -33,7 +33,7 @@ class FacebookTest < Minitest::Test
33
33
  assert match.fake?, msg: "#{ip} is a fake Facebook IP"
34
34
  end
35
35
 
36
- # rubocop:disable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
36
+ # rubocop:disable Layout/LineLength, Metrics/MethodLength
37
37
  def test_user_agent
38
38
  Legitbot.bot(
39
39
  'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
@@ -59,5 +59,5 @@ class FacebookTest < Minitest::Test
59
59
  assert bot.fake?, msg: 'fake Facebook'
60
60
  end
61
61
  end
62
- # rubocop:enable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
62
+ # rubocop:enable Layout/LineLength, Metrics/MethodLength
63
63
  end
@@ -51,6 +51,11 @@ module Legitbot
51
51
  ip_ranges { nil }
52
52
  end
53
53
 
54
+ class Ipv4Ranges
55
+ include IpRanges
56
+ ip_ranges { ['66.220.144.0/21'] }
57
+ end
58
+
54
59
  class IpRangesTest < Minitest::Test
55
60
  def test_partition_method
56
61
  empty = NoRanges.partition_ips([])
@@ -117,6 +122,10 @@ module Legitbot
117
122
  def test_nil_ranges
118
123
  assert NilRanges.valid_ip?('127.0.0.1')
119
124
  end
125
+
126
+ def test_ipv4_only_ranges
127
+ refute Ipv4Ranges.valid_ip?('2a03:2880:f234:0:0:0:0:1')
128
+ end
120
129
  end
121
130
  end
122
131
  end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class PetalbotTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Petalbot.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real Petalbot IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '114.119.153.50'
15
+ match = Legitbot::Petalbot.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid Petalbot IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'Petalbot detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid Petalbot'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
31
+ '114.119.153.50'
32
+ )
33
+ assert bot, msg: 'Petalbot detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid Petalbot'
35
+ end
36
+
37
+ def test_valid_name
38
+ bot = Legitbot.bot(
39
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
40
+ '66.249.64.141'
41
+ )
42
+ assert_equal :petalbot, bot.detected_as
43
+ end
44
+
45
+ def test_fake_name
46
+ bot = Legitbot.bot(
47
+ 'Mozilla/5.0 (compatible; PetalBot/2.1; +http://www.google.com/bot.html)',
48
+ '81.1.172.108'
49
+ )
50
+ assert_equal :petalbot, bot.detected_as
51
+ end
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-28 00:00:00.000000000 Z
11
+ date: 2020-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -96,40 +96,40 @@ dependencies:
96
96
  requirements:
97
97
  - - "~>"
98
98
  - !ruby/object:Gem::Version
99
- version: '12.3'
99
+ version: '13.0'
100
100
  - - ">="
101
101
  - !ruby/object:Gem::Version
102
- version: 12.3.0
102
+ version: 13.0.0
103
103
  type: :development
104
104
  prerelease: false
105
105
  version_requirements: !ruby/object:Gem::Requirement
106
106
  requirements:
107
107
  - - "~>"
108
108
  - !ruby/object:Gem::Version
109
- version: '12.3'
109
+ version: '13.0'
110
110
  - - ">="
111
111
  - !ruby/object:Gem::Version
112
- version: 12.3.0
112
+ version: 13.0.0
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: rubocop
115
115
  requirement: !ruby/object:Gem::Requirement
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: '0.74'
119
+ version: 0.92.0
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 0.74.0
122
+ version: 0.92.0
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: '0.74'
129
+ version: 0.92.0
130
130
  - - ">="
131
131
  - !ruby/object:Gem::Version
132
- version: 0.74.0
132
+ version: 0.92.0
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -137,6 +137,7 @@ executables: []
137
137
  extensions: []
138
138
  extra_rdoc_files: []
139
139
  files:
140
+ - ".editorconfig"
140
141
  - ".github/workflows/build.yml"
141
142
  - ".gitignore"
142
143
  - ".rubocop.yml"
@@ -148,6 +149,7 @@ files:
148
149
  - legitbot.gemspec
149
150
  - lib/legitbot.rb
150
151
  - lib/legitbot/ahrefs.rb
152
+ - lib/legitbot/alexa.rb
151
153
  - lib/legitbot/apple.rb
152
154
  - lib/legitbot/baidu.rb
153
155
  - lib/legitbot/bing.rb
@@ -158,6 +160,7 @@ files:
158
160
  - lib/legitbot/google.rb
159
161
  - lib/legitbot/legitbot.rb
160
162
  - lib/legitbot/oracle.rb
163
+ - lib/legitbot/petalbot.rb
161
164
  - lib/legitbot/pinterest.rb
162
165
  - lib/legitbot/twitter.rb
163
166
  - lib/legitbot/validators/domains.rb
@@ -165,7 +168,7 @@ files:
165
168
  - lib/legitbot/version.rb
166
169
  - lib/legitbot/yandex.rb
167
170
  - test/ahrefs_test.rb
168
- - test/apple_as_google_test.rb
171
+ - test/alexa_test.rb
169
172
  - test/apple_test.rb
170
173
  - test/botmatch_test.rb
171
174
  - test/facebook_test.rb
@@ -174,6 +177,7 @@ files:
174
177
  - test/legitbot/validators/ip_ranges_test.rb
175
178
  - test/legitbot_test.rb
176
179
  - test/oracle_test.rb
180
+ - test/petalbot_test.rb
177
181
  - test/pinterest_test.rb
178
182
  - test/twitter_test.rb
179
183
  homepage: https://github.com/alaz/legitbot
@@ -196,7 +200,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
196
200
  - !ruby/object:Gem::Version
197
201
  version: '0'
198
202
  requirements: []
199
- rubygems_version: 3.1.2
203
+ rubygems_version: 3.1.4
200
204
  signing_key:
201
205
  specification_version: 4
202
206
  summary: 'Validate requests from Web crawlers: impersonating or not?'
@@ -205,11 +209,12 @@ test_files:
205
209
  - test/legitbot/validators/domains_test.rb
206
210
  - test/legitbot/validators/ip_ranges_test.rb
207
211
  - test/pinterest_test.rb
212
+ - test/alexa_test.rb
208
213
  - test/ahrefs_test.rb
209
214
  - test/apple_test.rb
210
- - test/apple_as_google_test.rb
211
215
  - test/oracle_test.rb
212
216
  - test/google_test.rb
217
+ - test/petalbot_test.rb
213
218
  - test/botmatch_test.rb
214
219
  - test/facebook_test.rb
215
220
  - test/twitter_test.rb
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'minitest/autorun'
4
- require 'legitbot'
5
-
6
- class AppleAsGoogleTest < Minitest::Test
7
- def test_valid_ip
8
- ip = '17.58.98.60'
9
- match = Legitbot::Apple_as_Google.new(ip)
10
- assert match.valid?, msg: "#{ip} is a valid Applebot IP"
11
- end
12
-
13
- def test_invalid_ip
14
- ip = '127.0.0.1'
15
- match = Legitbot::Apple_as_Google.new(ip)
16
- assert match.fake?, msg: "#{ip} is a fake Applebot IP"
17
- end
18
-
19
- def test_user_agent
20
- bot = Legitbot.bot(
21
- 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
22
- '17.58.98.60'
23
- )
24
- assert_equal :apple_as_google, bot.detected_as
25
- assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
26
- end
27
- end