legitbot 1.7.3 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1eb23292d664c393d5b39a9d7a69b0469dbf326f1c5ef2cda2236cd16baf6eda
4
- data.tar.gz: fefe1fd716e81ff8e0f38950be0790006cc84bb9b9c4b1a53997331010efc861
3
+ metadata.gz: a7cde94cf9e8a396867e4e97a490c1b4da0b300652e619ac200477a5e3aed1d5
4
+ data.tar.gz: 1d505df51aa086231f85080fcf3a60291a660cfce3ce79bdeab0591ba9d27578
5
5
  SHA512:
6
- metadata.gz: 7d156a7574284f8b401f6bcf78bb3f005f6f8c4ad11e67c9f30b515ed08d7855481052716cc2dd60f1eb33f19702b5b6bf8a5bd42b56c7907bc24d791cc4fa63
7
- data.tar.gz: b33800ac92eaa874a4b74372281b3d512f34b6ab18e7743ef9857237508188ab92fdca1b554a0307397cf63d898f69c23f02462adbe8d52e0a58011cbda9ec4f
6
+ metadata.gz: 41f811fd8c20c9a442218e36a8e54e1e72731b8443c6141205b5f3b7accbdbaeb85491134d2ca3f4cb817d01dc774348c52dae705a7655d98345f975782b5c4a
7
+ data.tar.gz: 77912f09be50c5d868099a6ecc2402b582d17039a82b9a57ee790724bf3b99123d7b5db071928f944c814afcc226818530b953c62f31d6f4a5d1df6bfdcefc54
data/Gemfile CHANGED
@@ -2,3 +2,16 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
  gemspec
5
+
6
+ group :development do
7
+ gem 'bump'
8
+ gem 'dns_mock'
9
+ gem 'jsonpath'
10
+ gem 'minitest'
11
+ gem 'minitest-hooks'
12
+ gem 'nokogiri'
13
+ gem 'rake'
14
+ gem 'rubocop'
15
+ gem 'rubocop-minitest'
16
+ gem 'simplecov-cobertura'
17
+ end
data/README.md CHANGED
@@ -57,6 +57,7 @@ end
57
57
  * [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
58
58
  * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
59
59
  * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
60
+ * [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
60
61
  * [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
61
62
  * [Petal search engine](http://aspiegel.com/petalbot)
62
63
  * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
data/legitbot.gemspec CHANGED
@@ -22,15 +22,6 @@ Gem::Specification.new do |spec|
22
22
  spec.required_ruby_version = '>= 2.6.0'
23
23
  spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
24
24
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
25
- spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
26
- spec.add_development_dependency 'dns_mock', '~> 1.5.0', '>= 1.5.0'
27
- spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
28
- spec.add_development_dependency 'minitest-hooks', '~> 1.5', '>= 1.5.0'
29
- spec.add_development_dependency 'nokogiri', '~> 1.14', '>= 1.14.3'
30
- spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
31
- spec.add_development_dependency 'rubocop', '~> 1.50.0', '>= 1.50.0'
32
- spec.add_development_dependency 'rubocop-minitest', '~> 0.31.0', '>= 0.31.0'
33
- spec.add_development_dependency 'simplecov-cobertura', '~> 2.0', '>= 2.0'
34
25
 
35
26
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
36
27
  spec.rdoc_options = ['--charset=UTF-8']
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/
5
+ class Ias < BotMatch
6
+ # @fetch:url https://integralads.com/policy-docs/iasbot.json
7
+ # @fetch:jsonpath $.publicIPs[*].ipv4
8
+ ip_ranges %w[
9
+ 3.217.168.199
10
+ 3.226.51.67
11
+ 18.214.43.70
12
+ ]
13
+ end
14
+
15
+ rule Legitbot::Ias, %w[ias_crawler ias_wombles]
16
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.7.3'
4
+ VERSION = '1.8.0'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -12,6 +12,7 @@ require_relative 'legitbot/bing'
12
12
  require_relative 'legitbot/duckduckgo'
13
13
  require_relative 'legitbot/facebook'
14
14
  require_relative 'legitbot/google'
15
+ require_relative 'legitbot/ias'
15
16
  require_relative 'legitbot/oracle'
16
17
  require_relative 'legitbot/petalbot'
17
18
  require_relative 'legitbot/pinterest'
@@ -3,6 +3,7 @@
3
3
  require 'ipaddr'
4
4
  require 'net/http'
5
5
  require 'nokogiri'
6
+ require 'jsonpath'
6
7
  require 'rubocop'
7
8
  require 'uri'
8
9
 
@@ -34,12 +35,17 @@ module RuboCop
34
35
 
35
36
  private
36
37
 
37
- def fetch_ips(url:, selector:)
38
+ def fetch_ips(url:, selector: nil, jsonpath: nil)
38
39
  response = Net::HTTP.get_response URI(url)
39
40
  response.value
40
41
 
41
- document = Nokogiri::HTML response.body
42
- document.css(selector).map(&:content).sort_by(&IPAddr.method(:new))
42
+ if selector
43
+ document = Nokogiri::HTML response.body
44
+ document.css(selector).map(&:content).sort_by(&IPAddr.method(:new))
45
+ else
46
+ document = JSON.parse response.body
47
+ JsonPath.new(jsonpath).on(document).sort_by(&IPAddr.method(:new))
48
+ end
43
49
  end
44
50
 
45
51
  def read_node_ips(value)
@@ -54,7 +60,7 @@ module RuboCop
54
60
  end
55
61
 
56
62
  def mandatory_params?(params)
57
- params.include?(:url) && params.include?(:selector)
63
+ params.include?(:url) && (params.include?(:selector) || params.include?(:jsonpath))
58
64
  end
59
65
 
60
66
  def fetch_params(node)
data/test/ias_test.rb ADDED
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class IasTest < Minitest::Test
6
+ def test_malicious_ip
7
+ ip = '149.210.164.47'
8
+ match = Legitbot::Ias.new ip
9
+
10
+ refute_predicate match, :valid?
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '18.214.43.70'
15
+ match = Legitbot::Ias.new ip
16
+
17
+ assert_predicate match, :valid?
18
+ end
19
+
20
+ def test_malicious_ua
21
+ bot = Legitbot.bot(
22
+ 'IAS Crawler (ias_crawler; http://integralads.com/site-indexing-policy/)',
23
+ '18.214.43.72'
24
+ )
25
+
26
+ assert bot
27
+ refute_predicate bot, :valid?
28
+ end
29
+
30
+ def test_valid_ua
31
+ bot = Legitbot.bot(
32
+ 'IAS Crawler (ias_crawler; http://integralads.com/site-indexing-policy/)',
33
+ '18.214.43.70'
34
+ )
35
+
36
+ assert bot
37
+ assert_predicate bot, :valid?
38
+ end
39
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.3
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-06 00:00:00.000000000 Z
11
+ date: 2023-07-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast_interval_tree
@@ -50,186 +50,6 @@ dependencies:
50
50
  - - ">="
51
51
  - !ruby/object:Gem::Version
52
52
  version: 0.2.1
53
- - !ruby/object:Gem::Dependency
54
- name: bump
55
- requirement: !ruby/object:Gem::Requirement
56
- requirements:
57
- - - "~>"
58
- - !ruby/object:Gem::Version
59
- version: '0.8'
60
- - - ">="
61
- - !ruby/object:Gem::Version
62
- version: 0.8.0
63
- type: :development
64
- prerelease: false
65
- version_requirements: !ruby/object:Gem::Requirement
66
- requirements:
67
- - - "~>"
68
- - !ruby/object:Gem::Version
69
- version: '0.8'
70
- - - ">="
71
- - !ruby/object:Gem::Version
72
- version: 0.8.0
73
- - !ruby/object:Gem::Dependency
74
- name: dns_mock
75
- requirement: !ruby/object:Gem::Requirement
76
- requirements:
77
- - - "~>"
78
- - !ruby/object:Gem::Version
79
- version: 1.5.0
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: 1.5.0
83
- type: :development
84
- prerelease: false
85
- version_requirements: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - "~>"
88
- - !ruby/object:Gem::Version
89
- version: 1.5.0
90
- - - ">="
91
- - !ruby/object:Gem::Version
92
- version: 1.5.0
93
- - !ruby/object:Gem::Dependency
94
- name: minitest
95
- requirement: !ruby/object:Gem::Requirement
96
- requirements:
97
- - - "~>"
98
- - !ruby/object:Gem::Version
99
- version: '5.1'
100
- - - ">="
101
- - !ruby/object:Gem::Version
102
- version: 5.1.0
103
- type: :development
104
- prerelease: false
105
- version_requirements: !ruby/object:Gem::Requirement
106
- requirements:
107
- - - "~>"
108
- - !ruby/object:Gem::Version
109
- version: '5.1'
110
- - - ">="
111
- - !ruby/object:Gem::Version
112
- version: 5.1.0
113
- - !ruby/object:Gem::Dependency
114
- name: minitest-hooks
115
- requirement: !ruby/object:Gem::Requirement
116
- requirements:
117
- - - "~>"
118
- - !ruby/object:Gem::Version
119
- version: '1.5'
120
- - - ">="
121
- - !ruby/object:Gem::Version
122
- version: 1.5.0
123
- type: :development
124
- prerelease: false
125
- version_requirements: !ruby/object:Gem::Requirement
126
- requirements:
127
- - - "~>"
128
- - !ruby/object:Gem::Version
129
- version: '1.5'
130
- - - ">="
131
- - !ruby/object:Gem::Version
132
- version: 1.5.0
133
- - !ruby/object:Gem::Dependency
134
- name: nokogiri
135
- requirement: !ruby/object:Gem::Requirement
136
- requirements:
137
- - - "~>"
138
- - !ruby/object:Gem::Version
139
- version: '1.14'
140
- - - ">="
141
- - !ruby/object:Gem::Version
142
- version: 1.14.3
143
- type: :development
144
- prerelease: false
145
- version_requirements: !ruby/object:Gem::Requirement
146
- requirements:
147
- - - "~>"
148
- - !ruby/object:Gem::Version
149
- version: '1.14'
150
- - - ">="
151
- - !ruby/object:Gem::Version
152
- version: 1.14.3
153
- - !ruby/object:Gem::Dependency
154
- name: rake
155
- requirement: !ruby/object:Gem::Requirement
156
- requirements:
157
- - - "~>"
158
- - !ruby/object:Gem::Version
159
- version: '13.0'
160
- - - ">="
161
- - !ruby/object:Gem::Version
162
- version: 13.0.0
163
- type: :development
164
- prerelease: false
165
- version_requirements: !ruby/object:Gem::Requirement
166
- requirements:
167
- - - "~>"
168
- - !ruby/object:Gem::Version
169
- version: '13.0'
170
- - - ">="
171
- - !ruby/object:Gem::Version
172
- version: 13.0.0
173
- - !ruby/object:Gem::Dependency
174
- name: rubocop
175
- requirement: !ruby/object:Gem::Requirement
176
- requirements:
177
- - - "~>"
178
- - !ruby/object:Gem::Version
179
- version: 1.50.0
180
- - - ">="
181
- - !ruby/object:Gem::Version
182
- version: 1.50.0
183
- type: :development
184
- prerelease: false
185
- version_requirements: !ruby/object:Gem::Requirement
186
- requirements:
187
- - - "~>"
188
- - !ruby/object:Gem::Version
189
- version: 1.50.0
190
- - - ">="
191
- - !ruby/object:Gem::Version
192
- version: 1.50.0
193
- - !ruby/object:Gem::Dependency
194
- name: rubocop-minitest
195
- requirement: !ruby/object:Gem::Requirement
196
- requirements:
197
- - - "~>"
198
- - !ruby/object:Gem::Version
199
- version: 0.31.0
200
- - - ">="
201
- - !ruby/object:Gem::Version
202
- version: 0.31.0
203
- type: :development
204
- prerelease: false
205
- version_requirements: !ruby/object:Gem::Requirement
206
- requirements:
207
- - - "~>"
208
- - !ruby/object:Gem::Version
209
- version: 0.31.0
210
- - - ">="
211
- - !ruby/object:Gem::Version
212
- version: 0.31.0
213
- - !ruby/object:Gem::Dependency
214
- name: simplecov-cobertura
215
- requirement: !ruby/object:Gem::Requirement
216
- requirements:
217
- - - "~>"
218
- - !ruby/object:Gem::Version
219
- version: '2.0'
220
- - - ">="
221
- - !ruby/object:Gem::Version
222
- version: '2.0'
223
- type: :development
224
- prerelease: false
225
- version_requirements: !ruby/object:Gem::Requirement
226
- requirements:
227
- - - "~>"
228
- - !ruby/object:Gem::Version
229
- version: '2.0'
230
- - - ">="
231
- - !ruby/object:Gem::Version
232
- version: '2.0'
233
53
  description: Does Web request come from a real search engine or from an impersonating
234
54
  agent?
235
55
  email: self@alaz.me
@@ -261,6 +81,7 @@ files:
261
81
  - lib/legitbot/duckduckgo.rb
262
82
  - lib/legitbot/facebook.rb
263
83
  - lib/legitbot/google.rb
84
+ - lib/legitbot/ias.rb
264
85
  - lib/legitbot/legitbot.rb
265
86
  - lib/legitbot/oracle.rb
266
87
  - lib/legitbot/petalbot.rb
@@ -284,6 +105,7 @@ files:
284
105
  - test/botmatch_test.rb
285
106
  - test/facebook_test.rb
286
107
  - test/google_test.rb
108
+ - test/ias_test.rb
287
109
  - test/legitbot/validators/domains_test.rb
288
110
  - test/legitbot/validators/ip_ranges_test.rb
289
111
  - test/legitbot_test.rb