legitbot 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1596ed8c3809fc3c3068f14d8bbbcf84232286fd7157fe724ba9515d195259cd
4
- data.tar.gz: f0078d0404d752550adeeaa9c64954cbcec57fef3d0b37c98bc50d765c29bc1f
3
+ metadata.gz: c0b43dcf4386a8427415729bf9ba5edfe3c0efc24a002bb4301ff98d43c4ee0d
4
+ data.tar.gz: 9f18f03e613473a97cda23e8663d44da10cfd32b3fd271e1ffd5ee20cfdef48d
5
5
  SHA512:
6
- metadata.gz: 5640ae1e351bbd697325cd0bed0bdac45c3726e7b31923b7b5c8f51859ed025d4eb368d937a2b3bf9ef6c150d83418f8e019304dec3bc1b010572a4e1598c661
7
- data.tar.gz: bf326da52d3adf1b2cfa3693b51cc763daef54c9b175cfb91b437b213722a0f578565db553f1240b6d6ee47d159bb418fdcc1db1f95cb14696b2bfb52e50b75d
6
+ metadata.gz: 73ad2e000c4513880bc5558c7fe5f7c7c04d01b1a820ef7b12541c8c9d6cc5dddd20c785d67ef489d5b00664632c6b88a805c3bb1e2a060517126c7d7292d661
7
+ data.tar.gz: a578a30a512495523e906ec1d6a650f4f472a58856caa542714d3e57a0b99f764ee015f11624c4a20cae79a3975eb41eaf8ced106977dd994981d71c288b5653
@@ -1,6 +1,6 @@
1
1
  name: build
2
2
 
3
- on: [push]
3
+ on: [pull_request, push]
4
4
 
5
5
  jobs:
6
6
  test:
@@ -1,2 +1,3 @@
1
1
  AllCops:
2
2
  CacheRootDirectory: 'vendor'
3
+ NewCops: enable
data/README.md CHANGED
@@ -44,6 +44,7 @@ end
44
44
  ## Supported
45
45
 
46
46
  * [Ahrefs](https://ahrefs.com/robot)
47
+ * [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
47
48
  * [Applebot](https://support.apple.com/en-us/HT204683)
48
49
  * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
49
50
  * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
21
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
22
  spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
23
- spec.add_development_dependency 'rubocop', '~> 0.74', '>= 0.74.0'
23
+ spec.add_development_dependency 'rubocop', '~> 0.90', '< 0.91'
24
24
 
25
25
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
26
  spec.rdoc_options = ['--charset=UTF-8']
@@ -4,6 +4,7 @@ require_relative 'legitbot/legitbot'
4
4
  require_relative 'legitbot/botmatch'
5
5
 
6
6
  require_relative 'legitbot/ahrefs'
7
+ require_relative 'legitbot/alexa'
7
8
  require_relative 'legitbot/apple'
8
9
  require_relative 'legitbot/baidu'
9
10
  require_relative 'legitbot/bing'
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-
5
+ # https://support.alexa.com/hc/en-us/articles/200462340
6
+ # https://support.alexa.com/hc/en-us/articles/200450194
7
+ class Alexa < BotMatch
8
+ ip_ranges %w[
9
+ 52.86.176.3
10
+ 52.4.48.181
11
+ 52.2.182.169
12
+ 52.86.185.29
13
+ ]
14
+ end
15
+
16
+ rule Legitbot::Alexa, %w[Alexabot ia_archiver]
17
+ end
@@ -18,7 +18,7 @@ module Legitbot
18
18
  # otherwise.
19
19
  # :yields: a found bot
20
20
  #
21
- # rubocop:disable Metrics/CyclomaticComplexity
21
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
22
22
  def self.bot(user_agent, ip)
23
23
  bots = @rules
24
24
  .select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
@@ -33,7 +33,7 @@ module Legitbot
33
33
  selected
34
34
  end
35
35
  end
36
- # rubocop:enable Metrics/CyclomaticComplexity
36
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
37
37
 
38
38
  def self.rule(clazz, fragments)
39
39
  @rules << { class: clazz, fragments: fragments }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.0.1'
4
+ VERSION = '1.0.2'
5
5
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class AlexaTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Alexa.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real Alexa IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '52.86.176.3'
15
+ match = Legitbot::Alexa.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid Alexa IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'Alexa detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid Alexa'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
31
+ '52.86.176.3'
32
+ )
33
+ assert bot, msg: 'Alexa detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid Alexa'
35
+ end
36
+ end
@@ -33,7 +33,7 @@ class FacebookTest < Minitest::Test
33
33
  assert match.fake?, msg: "#{ip} is a fake Facebook IP"
34
34
  end
35
35
 
36
- # rubocop:disable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
36
+ # rubocop:disable Layout/LineLength, Metrics/MethodLength
37
37
  def test_user_agent
38
38
  Legitbot.bot(
39
39
  'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
@@ -59,5 +59,5 @@ class FacebookTest < Minitest::Test
59
59
  assert bot.fake?, msg: 'fake Facebook'
60
60
  end
61
61
  end
62
- # rubocop:enable Metrics/AbcSize, Layout/LineLength, Metrics/MethodLength
62
+ # rubocop:enable Layout/LineLength, Metrics/MethodLength
63
63
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-28 00:00:00.000000000 Z
11
+ date: 2020-09-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -116,20 +116,20 @@ dependencies:
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: '0.74'
120
- - - ">="
119
+ version: '0.90'
120
+ - - "<"
121
121
  - !ruby/object:Gem::Version
122
- version: 0.74.0
122
+ version: '0.91'
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: '0.74'
130
- - - ">="
129
+ version: '0.90'
130
+ - - "<"
131
131
  - !ruby/object:Gem::Version
132
- version: 0.74.0
132
+ version: '0.91'
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -148,6 +148,7 @@ files:
148
148
  - legitbot.gemspec
149
149
  - lib/legitbot.rb
150
150
  - lib/legitbot/ahrefs.rb
151
+ - lib/legitbot/alexa.rb
151
152
  - lib/legitbot/apple.rb
152
153
  - lib/legitbot/baidu.rb
153
154
  - lib/legitbot/bing.rb
@@ -165,6 +166,7 @@ files:
165
166
  - lib/legitbot/version.rb
166
167
  - lib/legitbot/yandex.rb
167
168
  - test/ahrefs_test.rb
169
+ - test/alexa_test.rb
168
170
  - test/apple_as_google_test.rb
169
171
  - test/apple_test.rb
170
172
  - test/botmatch_test.rb
@@ -205,6 +207,7 @@ test_files:
205
207
  - test/legitbot/validators/domains_test.rb
206
208
  - test/legitbot/validators/ip_ranges_test.rb
207
209
  - test/pinterest_test.rb
210
+ - test/alexa_test.rb
208
211
  - test/ahrefs_test.rb
209
212
  - test/apple_test.rb
210
213
  - test/apple_as_google_test.rb