legitbot 1.10.2 → 1.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 65ae4bb4d07e0b89472f7d5ddb14886d2f9f20248a7bfb8394040ae4504795d5
4
- data.tar.gz: 2fcd4b2fce6c14f3a2eb9ac15a145059fe9455ec46a36685e15dafb60aa4bdac
3
+ metadata.gz: a6f6866d656dda624b8222126873aeb7276886344a3bedcb11b6aeb18cd10e64
4
+ data.tar.gz: fcc350098a93804f5dfddcf38d945ad1a75c8dced01405cf4a59a70d570f4196
5
5
  SHA512:
6
- metadata.gz: 7259955b97e5289b225722055f725db8ebde11fa67a3f4286d828219788281d1ab96ee854199fa9fb0a84f2f41c29a9f538371dd7dfe851477f65cc665a33376
7
- data.tar.gz: e0f07a76b089ecadef39df7d7bb3c490f3b2f8fb581410eeb956118ace595d15f0829331447c9167e8426ca92f99682242dd7c0fb128aa82efc2081af7c0820e
6
+ metadata.gz: 998bca74d492853877cd0775aff8bdea57d94521dd91f829289689e35717a190c94250a490a9033c1edfade19bf5caaeeb15771575c46a9096e811b08e6473a4
7
+ data.tar.gz: e491ef1b1e8aa07783989ef2992d7d484c62685ba3ea5147c4b2256880939a644d489a0defa3c59a7b0a5b8cb7ff0b82d4cc5a786ed294d647c19ff40c35b2e0
@@ -2,7 +2,7 @@ name: autocorrect
2
2
 
3
3
  on:
4
4
  schedule:
5
- - cron: '29 6 * * 6'
5
+ - cron: "29 6 * * 6"
6
6
 
7
7
  workflow_dispatch:
8
8
 
@@ -12,36 +12,36 @@ jobs:
12
12
 
13
13
  strategy:
14
14
  matrix:
15
- ruby: [ 3.0 ]
15
+ ruby: [3.1]
16
16
 
17
17
  steps:
18
- - uses: actions/checkout@v3
19
- - name: Set up Ruby
20
- uses: ruby/setup-ruby@v1
21
- with:
22
- ruby-version: ${{ matrix.ruby }}
23
- - name: Cache dependencies
24
- uses: actions/cache@v3
25
- with:
26
- path: vendor/bundle
27
- key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
28
- restore-keys: |
29
- ${{ runner.os }}-${{ matrix.ruby }}-gems-
30
- - name: Install dependencies
31
- run: |
32
- bundle config path vendor/bundle
33
- bundle install --jobs 4 --retry 3
34
- - name: Run tests
35
- run: bundle exec rake test
36
- - name: Run linter
37
- run: bundle exec rubocop -A
38
- continue-on-error: true
39
- - name: Create Pull Request
40
- uses: peter-evans/create-pull-request@v5
41
- with:
42
- branch: update/lint-autocorrect
43
- delete-branch: true
44
- commit-message: '🔧 autocorrect by Rubocop'
45
- title: '[rubocop] Automatic corrections'
46
- body: |
47
- Please review automatic changes proposed by `rubocop -a`.
18
+ - uses: actions/checkout@v3
19
+ - name: Set up Ruby
20
+ uses: ruby/setup-ruby@v1
21
+ with:
22
+ ruby-version: ${{ matrix.ruby }}
23
+ - name: Cache dependencies
24
+ uses: actions/cache@v3
25
+ with:
26
+ path: vendor/bundle
27
+ key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
28
+ restore-keys: |
29
+ ${{ runner.os }}-${{ matrix.ruby }}-gems-
30
+ - name: Install dependencies
31
+ run: |
32
+ bundle config path vendor/bundle
33
+ bundle install --jobs 4 --retry 3
34
+ - name: Run tests
35
+ run: bundle exec rake test
36
+ - name: Run linter
37
+ run: bundle exec rubocop -A
38
+ continue-on-error: true
39
+ - name: Create Pull Request
40
+ uses: peter-evans/create-pull-request@v5
41
+ with:
42
+ branch: update/lint-autocorrect
43
+ delete-branch: true
44
+ commit-message: "🔧 autocorrect by Rubocop"
45
+ title: "[rubocop] Automatic corrections"
46
+ body: |
47
+ Please review automatic changes proposed by `rubocop -a`.
@@ -0,0 +1,41 @@
1
+ name: autocorrect
2
+
3
+ on:
4
+ pull_request:
5
+
6
+ push:
7
+ branches:
8
+ - "master"
9
+
10
+ jobs:
11
+ codecov:
12
+ runs-on: ubuntu-latest
13
+
14
+ strategy:
15
+ matrix:
16
+ ruby: [3.1]
17
+
18
+ steps:
19
+ - uses: actions/checkout@v3
20
+ - name: Set up Ruby
21
+ uses: ruby/setup-ruby@v1
22
+ with:
23
+ ruby-version: ${{ matrix.ruby }}
24
+ - name: Cache dependencies
25
+ uses: actions/cache@v3
26
+ with:
27
+ path: vendor/bundle
28
+ key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
29
+ restore-keys: |
30
+ ${{ runner.os }}-${{ matrix.ruby }}-gems-
31
+ - name: Install dependencies
32
+ run: |
33
+ bundle config path vendor/bundle
34
+ bundle install --jobs 4 --retry 3
35
+ - name: Run tests
36
+ run: bundle exec rake test
37
+ - name: Upload coverage
38
+ uses: codecov/codecov-action@v4
39
+ with:
40
+ files: coverage/coverage.xml
41
+ token: ${{ secrets.CODECOV_TOKEN }}
@@ -0,0 +1,61 @@
1
+ name: build
2
+
3
+ on:
4
+ pull_request:
5
+
6
+ jobs:
7
+ test:
8
+ runs-on: ubuntu-latest
9
+
10
+ strategy:
11
+ fail-fast: false
12
+ matrix:
13
+ ruby: [jruby, 3.0.6, 3.1, 3.2]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v3
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ - name: Cache dependencies
22
+ uses: actions/cache@v3
23
+ with:
24
+ path: vendor/bundle
25
+ key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
26
+ restore-keys: |
27
+ ${{ runner.os }}-${{ matrix.ruby }}-gems-
28
+ - name: Install dependencies
29
+ run: |
30
+ bundle config path vendor/bundle
31
+ bundle install --jobs 4 --retry 3
32
+ - name: Run tests
33
+ run: bundle exec rake test
34
+
35
+ lint:
36
+ needs: test
37
+ runs-on: ubuntu-latest
38
+
39
+ strategy:
40
+ matrix:
41
+ ruby: [3.1]
42
+
43
+ steps:
44
+ - uses: actions/checkout@v3
45
+ - name: Set up Ruby
46
+ uses: ruby/setup-ruby@v1
47
+ with:
48
+ ruby-version: ${{ matrix.ruby }}
49
+ - name: Cache dependencies
50
+ uses: actions/cache@v3
51
+ with:
52
+ path: vendor/bundle
53
+ key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
54
+ restore-keys: |
55
+ ${{ runner.os }}-${{ matrix.ruby }}-gems-
56
+ - name: Install dependencies
57
+ run: |
58
+ bundle config path vendor/bundle
59
+ bundle install --jobs 4 --retry 3
60
+ - name: Run linter
61
+ run: bundle exec rubocop
data/.gitignore CHANGED
@@ -2,6 +2,7 @@ Gemfile.lock
2
2
  .bundle
3
3
  *.gem
4
4
  *.gemfile.lock
5
+ /.vscode
5
6
  /coverage
6
7
  /pkg
7
8
  /tags
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.6
1
+ 3.1.6
data/README.md CHANGED
@@ -50,7 +50,6 @@ end
50
50
  ## Supported
51
51
 
52
52
  - [Ahrefs](https://ahrefs.com/robot)
53
- - [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
54
53
  - [Amazon AdBot](https://adbot.amazon.com/index.html)
55
54
  - [Applebot](https://support.apple.com/en-us/HT204683)
56
55
  - [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
data/legitbot.gemspec CHANGED
@@ -13,13 +13,13 @@ Gem::Specification.new do |spec|
13
13
  spec.author = 'Alexander Azarov'
14
14
  spec.email = 'self@alaz.me'
15
15
  spec.homepage = 'https://github.com/alaz/legitbot'
16
- spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
17
- spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
16
+ spec.summary = 'Web crawler request validation'
17
+ spec.description = 'Is this Web request from a real search engine or from an impersonating agent?'
18
18
  spec.metadata = {
19
19
  'rubygems_mfa_required' => 'true'
20
20
  }
21
21
 
22
- spec.required_ruby_version = '>= 2.6.0'
22
+ spec.required_ruby_version = '>= 3.0.0'
23
23
  spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
24
24
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
25
25
 
@@ -3,14 +3,94 @@
3
3
  module Legitbot # :nodoc:
4
4
  # https://ahrefs.com/robot
5
5
  class Ahrefs < BotMatch
6
+ # @fetch:url https://api.ahrefs.com/v3/public/crawler-ip-ranges?output=json
7
+ # @fetch:jsonpath $.prefixes[*].ipv4Prefix
6
8
  ip_ranges %w[
7
- 54.36.148.0/24
8
- 54.36.149.0/24
9
- 54.36.150.0/24
10
- 195.154.122.0/24
11
- 195.154.123.0/24
12
- 195.154.126.0/24
13
- 195.154.127.0/24
9
+ 5.39.1.224/27
10
+ 5.39.109.160/27
11
+ 15.235.27.0/24
12
+ 15.235.96.0/24
13
+ 15.235.98.0/24
14
+ 37.59.204.128/27
15
+ 51.68.247.192/27
16
+ 51.75.236.128/27
17
+ 51.89.129.0/24
18
+ 51.161.37.0/24
19
+ 51.161.65.0/24
20
+ 51.195.183.0/24
21
+ 51.195.215.0/24
22
+ 51.195.244.0/24
23
+ 51.222.95.0/24
24
+ 51.222.168.0/24
25
+ 51.222.253.0/26
26
+ 54.36.148.0/23
27
+ 54.37.118.64/27
28
+ 54.38.147.0/24
29
+ 54.39.0.0/24
30
+ 54.39.6.0/24
31
+ 54.39.89.0/24
32
+ 54.39.136.0/24
33
+ 54.39.203.0/24
34
+ 54.39.210.0/24
35
+ 92.222.104.192/27
36
+ 92.222.108.96/27
37
+ 94.23.188.192/27
38
+ 142.44.220.0/24
39
+ 142.44.225.0/24
40
+ 142.44.228.0/24
41
+ 142.44.233.0/24
42
+ 148.113.128.0/24
43
+ 148.113.130.0/24
44
+ 167.114.139.0/24
45
+ 168.119.64.245/32
46
+ 168.119.64.246/31
47
+ 168.119.64.248/30
48
+ 168.119.64.252/31
49
+ 168.119.64.254/32
50
+ 168.119.65.43/32
51
+ 168.119.65.44/30
52
+ 168.119.65.48/29
53
+ 168.119.65.56/30
54
+ 168.119.65.60/31
55
+ 168.119.65.62/32
56
+ 168.119.65.107/32
57
+ 168.119.65.108/30
58
+ 168.119.65.112/29
59
+ 168.119.65.120/30
60
+ 168.119.65.124/31
61
+ 168.119.65.126/32
62
+ 168.119.68.117/32
63
+ 168.119.68.118/31
64
+ 168.119.68.120/30
65
+ 168.119.68.124/31
66
+ 168.119.68.126/32
67
+ 168.119.68.171/32
68
+ 168.119.68.172/30
69
+ 168.119.68.176/29
70
+ 168.119.68.184/30
71
+ 168.119.68.188/31
72
+ 168.119.68.190/32
73
+ 168.119.68.235/32
74
+ 168.119.68.236/30
75
+ 168.119.68.240/29
76
+ 168.119.68.248/30
77
+ 168.119.68.252/31
78
+ 168.119.68.254/32
79
+ 176.31.139.0/27
80
+ 195.154.122.0/23
81
+ 195.154.126.0/23
82
+ 198.244.168.0/24
83
+ 198.244.183.0/24
84
+ 198.244.186.193/32
85
+ 198.244.186.194/31
86
+ 198.244.186.196/30
87
+ 198.244.186.200/31
88
+ 198.244.186.202/32
89
+ 198.244.226.0/24
90
+ 198.244.240.0/24
91
+ 198.244.242.0/24
92
+ 202.94.84.110/31
93
+ 202.94.84.112/31
14
94
  ]
15
95
  end
16
96
 
@@ -114,6 +114,7 @@ module Legitbot # :nodoc:
114
114
  40.119.232.215
115
115
  40.119.232.218
116
116
  40.119.232.251
117
+ 51.8.253.152
117
118
  51.104.146.225
118
119
  51.104.146.235
119
120
  51.104.160.167
@@ -221,6 +222,7 @@ module Legitbot # :nodoc:
221
222
  52.224.21.55
222
223
  52.224.21.61
223
224
  52.242.224.168
225
+ 57.152.72.128
224
226
  104.43.54.127
225
227
  104.43.55.116
226
228
  104.43.55.117
@@ -3,7 +3,8 @@
3
3
  module Legitbot # :nodoc:
4
4
  # https://platform.openai.com/docs/gptbot
5
5
  class GPTBot < BotMatch
6
- # @fetch:url https://openai.com/gptbot-ranges.txt
6
+ # NOTE: fetching has been disabled, see #131
7
+ # @ fetch:url https://openai.com/gptbot-ranges.txt
7
8
  ip_ranges %w[
8
9
  52.230.152.0/24
9
10
  52.233.106.0/24
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.10.2'
4
+ VERSION = '1.10.4'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -4,7 +4,6 @@ require_relative 'legitbot/legitbot'
4
4
  require_relative 'legitbot/botmatch'
5
5
 
6
6
  require_relative 'legitbot/ahrefs'
7
- require_relative 'legitbot/alexa'
8
7
  require_relative 'legitbot/amazon'
9
8
  require_relative 'legitbot/apple'
10
9
  require_relative 'legitbot/baidu'
@@ -14,7 +14,7 @@ module RuboCop
14
14
  extend AutoCorrector
15
15
 
16
16
  MSG = 'Outdated list of IP ranges compared to %<url>s'
17
- REGEXP = /^\s*#\s*@fetch:(?<param>[a-z0-9_]+)\s+(?<arg>.*)?/.freeze
17
+ REGEXP = /^\s*#\s*@fetch:(?<param>[a-z0-9_]+)\s+(?<arg>.*)?/
18
18
 
19
19
  def_node_matcher :on_ip_ranges, <<~PATTERN
20
20
  (send nil? :ip_ranges $(array str+))
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.2
4
+ version: 1.10.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-05-14 00:00:00.000000000 Z
11
+ date: 2024-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast_interval_tree
@@ -50,7 +50,7 @@ dependencies:
50
50
  - - ">="
51
51
  - !ruby/object:Gem::Version
52
52
  version: 0.2.1
53
- description: Does Web request come from a real search engine or from an impersonating
53
+ description: Is this Web request from a real search engine or from an impersonating
54
54
  agent?
55
55
  email: self@alaz.me
56
56
  executables: []
@@ -59,7 +59,8 @@ extra_rdoc_files: []
59
59
  files:
60
60
  - ".editorconfig"
61
61
  - ".github/workflows/autocorrect.yml"
62
- - ".github/workflows/build.yml"
62
+ - ".github/workflows/codecov.yml"
63
+ - ".github/workflows/pr.yml"
63
64
  - ".gitignore"
64
65
  - ".rubocop.yml"
65
66
  - ".ruby-version"
@@ -71,7 +72,6 @@ files:
71
72
  - legitbot.gemspec
72
73
  - lib/legitbot.rb
73
74
  - lib/legitbot/ahrefs.rb
74
- - lib/legitbot/alexa.rb
75
75
  - lib/legitbot/amazon.rb
76
76
  - lib/legitbot/apple.rb
77
77
  - lib/legitbot/baidu.rb
@@ -101,7 +101,6 @@ files:
101
101
  - rakelib/console.rake
102
102
  - rakelib/test.rake
103
103
  - test/ahrefs_test.rb
104
- - test/alexa_test.rb
105
104
  - test/amazon_test.rb
106
105
  - test/apple_test.rb
107
106
  - test/botmatch_test.rb
@@ -132,15 +131,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
132
131
  requirements:
133
132
  - - ">="
134
133
  - !ruby/object:Gem::Version
135
- version: 2.6.0
134
+ version: 3.0.0
136
135
  required_rubygems_version: !ruby/object:Gem::Requirement
137
136
  requirements:
138
137
  - - ">="
139
138
  - !ruby/object:Gem::Version
140
139
  version: '0'
141
140
  requirements: []
142
- rubygems_version: 3.2.33
141
+ rubygems_version: 3.3.27
143
142
  signing_key:
144
143
  specification_version: 4
145
- summary: 'Validate requests from Web crawlers: impersonating or not?'
144
+ summary: Web crawler request validation
146
145
  test_files: []
@@ -1,66 +0,0 @@
1
- name: build
2
-
3
- on:
4
- pull_request:
5
- push:
6
-
7
- jobs:
8
- test:
9
- runs-on: ubuntu-latest
10
-
11
- strategy:
12
- fail-fast: false
13
- matrix:
14
- ruby: [ jruby, 3.0, 3.1, 3.2 ]
15
-
16
- steps:
17
- - uses: actions/checkout@v3
18
- - name: Set up Ruby
19
- uses: ruby/setup-ruby@v1
20
- with:
21
- ruby-version: ${{ matrix.ruby }}
22
- - name: Cache dependencies
23
- uses: actions/cache@v3
24
- with:
25
- path: vendor/bundle
26
- key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
27
- restore-keys: |
28
- ${{ runner.os }}-${{ matrix.ruby }}-gems-
29
- - name: Install dependencies
30
- run: |
31
- bundle config path vendor/bundle
32
- bundle install --jobs 4 --retry 3
33
- - name: Run tests
34
- run: bundle exec rake test
35
- - name: Upload coverage
36
- uses: codecov/codecov-action@v3
37
- with:
38
- files: coverage/coverage.xml
39
-
40
- lint:
41
- needs: test
42
- runs-on: ubuntu-latest
43
-
44
- strategy:
45
- matrix:
46
- ruby: [ 3.0 ]
47
-
48
- steps:
49
- - uses: actions/checkout@v3
50
- - name: Set up Ruby
51
- uses: ruby/setup-ruby@v1
52
- with:
53
- ruby-version: ${{ matrix.ruby }}
54
- - name: Cache dependencies
55
- uses: actions/cache@v3
56
- with:
57
- path: vendor/bundle
58
- key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
59
- restore-keys: |
60
- ${{ runner.os }}-${{ matrix.ruby }}-gems-
61
- - name: Install dependencies
62
- run: |
63
- bundle config path vendor/bundle
64
- bundle install --jobs 4 --retry 3
65
- - name: Run linter
66
- run: bundle exec rubocop
@@ -1,17 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Legitbot # :nodoc:
4
- # https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-
5
- # https://support.alexa.com/hc/en-us/articles/200462340
6
- # https://support.alexa.com/hc/en-us/articles/200450194
7
- class Alexa < BotMatch
8
- ip_ranges %w[
9
- 52.86.176.3
10
- 52.4.48.181
11
- 52.2.182.169
12
- 52.86.185.29
13
- ]
14
- end
15
-
16
- rule Legitbot::Alexa, %w[Alexabot ia_archiver]
17
- end
data/test/alexa_test.rb DELETED
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'test_helper'
4
-
5
- class AlexaTest < Minitest::Test
6
- include Minitest::Hooks
7
- include DnsServerMock
8
-
9
- def test_malicious_ip
10
- ip = '149.210.164.47'
11
- match = Legitbot::Alexa.new ip
12
-
13
- refute_predicate match, :valid?
14
- end
15
-
16
- def test_valid_ip
17
- ip = '52.86.176.3'
18
- match = Legitbot::Alexa.new ip
19
-
20
- assert_predicate match, :valid?
21
- end
22
-
23
- def test_malicious_ua
24
- bot = Legitbot.bot(
25
- 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
26
- '149.210.164.47'
27
- )
28
-
29
- assert bot
30
- refute_predicate bot, :valid?
31
- end
32
-
33
- def test_valid_ua
34
- bot = Legitbot.bot(
35
- 'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
36
- '52.86.176.3'
37
- )
38
-
39
- assert bot
40
- assert_predicate bot, :valid?
41
- end
42
- end