legitbot 1.7.3 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1eb23292d664c393d5b39a9d7a69b0469dbf326f1c5ef2cda2236cd16baf6eda
4
- data.tar.gz: fefe1fd716e81ff8e0f38950be0790006cc84bb9b9c4b1a53997331010efc861
3
+ metadata.gz: a7cde94cf9e8a396867e4e97a490c1b4da0b300652e619ac200477a5e3aed1d5
4
+ data.tar.gz: 1d505df51aa086231f85080fcf3a60291a660cfce3ce79bdeab0591ba9d27578
5
5
  SHA512:
6
- metadata.gz: 7d156a7574284f8b401f6bcf78bb3f005f6f8c4ad11e67c9f30b515ed08d7855481052716cc2dd60f1eb33f19702b5b6bf8a5bd42b56c7907bc24d791cc4fa63
7
- data.tar.gz: b33800ac92eaa874a4b74372281b3d512f34b6ab18e7743ef9857237508188ab92fdca1b554a0307397cf63d898f69c23f02462adbe8d52e0a58011cbda9ec4f
6
+ metadata.gz: 41f811fd8c20c9a442218e36a8e54e1e72731b8443c6141205b5f3b7accbdbaeb85491134d2ca3f4cb817d01dc774348c52dae705a7655d98345f975782b5c4a
7
+ data.tar.gz: 77912f09be50c5d868099a6ecc2402b582d17039a82b9a57ee790724bf3b99123d7b5db071928f944c814afcc226818530b953c62f31d6f4a5d1df6bfdcefc54
data/Gemfile CHANGED
@@ -2,3 +2,16 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
  gemspec
5
+
6
+ group :development do
7
+ gem 'bump'
8
+ gem 'dns_mock'
9
+ gem 'jsonpath'
10
+ gem 'minitest'
11
+ gem 'minitest-hooks'
12
+ gem 'nokogiri'
13
+ gem 'rake'
14
+ gem 'rubocop'
15
+ gem 'rubocop-minitest'
16
+ gem 'simplecov-cobertura'
17
+ end
data/README.md CHANGED
@@ -57,6 +57,7 @@ end
57
57
  * [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
58
58
  * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
59
59
  * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
60
+ * [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
60
61
  * [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
61
62
  * [Petal search engine](http://aspiegel.com/petalbot)
62
63
  * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
data/legitbot.gemspec CHANGED
@@ -22,15 +22,6 @@ Gem::Specification.new do |spec|
22
22
  spec.required_ruby_version = '>= 2.6.0'
23
23
  spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
24
24
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
25
- spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
26
- spec.add_development_dependency 'dns_mock', '~> 1.5.0', '>= 1.5.0'
27
- spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
28
- spec.add_development_dependency 'minitest-hooks', '~> 1.5', '>= 1.5.0'
29
- spec.add_development_dependency 'nokogiri', '~> 1.14', '>= 1.14.3'
30
- spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
31
- spec.add_development_dependency 'rubocop', '~> 1.50.0', '>= 1.50.0'
32
- spec.add_development_dependency 'rubocop-minitest', '~> 0.31.0', '>= 0.31.0'
33
- spec.add_development_dependency 'simplecov-cobertura', '~> 2.0', '>= 2.0'
34
25
 
35
26
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
36
27
  spec.rdoc_options = ['--charset=UTF-8']
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/
5
+ class Ias < BotMatch
6
+ # @fetch:url https://integralads.com/policy-docs/iasbot.json
7
+ # @fetch:jsonpath $.publicIPs[*].ipv4
8
+ ip_ranges %w[
9
+ 3.217.168.199
10
+ 3.226.51.67
11
+ 18.214.43.70
12
+ ]
13
+ end
14
+
15
+ rule Legitbot::Ias, %w[ias_crawler ias_wombles]
16
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.7.3'
4
+ VERSION = '1.8.0'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -12,6 +12,7 @@ require_relative 'legitbot/bing'
12
12
  require_relative 'legitbot/duckduckgo'
13
13
  require_relative 'legitbot/facebook'
14
14
  require_relative 'legitbot/google'
15
+ require_relative 'legitbot/ias'
15
16
  require_relative 'legitbot/oracle'
16
17
  require_relative 'legitbot/petalbot'
17
18
  require_relative 'legitbot/pinterest'
@@ -3,6 +3,7 @@
3
3
  require 'ipaddr'
4
4
  require 'net/http'
5
5
  require 'nokogiri'
6
+ require 'jsonpath'
6
7
  require 'rubocop'
7
8
  require 'uri'
8
9
 
@@ -34,12 +35,17 @@ module RuboCop
34
35
 
35
36
  private
36
37
 
37
- def fetch_ips(url:, selector:)
38
+ def fetch_ips(url:, selector: nil, jsonpath: nil)
38
39
  response = Net::HTTP.get_response URI(url)
39
40
  response.value
40
41
 
41
- document = Nokogiri::HTML response.body
42
- document.css(selector).map(&:content).sort_by(&IPAddr.method(:new))
42
+ if selector
43
+ document = Nokogiri::HTML response.body
44
+ document.css(selector).map(&:content).sort_by(&IPAddr.method(:new))
45
+ else
46
+ document = JSON.parse response.body
47
+ JsonPath.new(jsonpath).on(document).sort_by(&IPAddr.method(:new))
48
+ end
43
49
  end
44
50
 
45
51
  def read_node_ips(value)
@@ -54,7 +60,7 @@ module RuboCop
54
60
  end
55
61
 
56
62
  def mandatory_params?(params)
57
- params.include?(:url) && params.include?(:selector)
63
+ params.include?(:url) && (params.include?(:selector) || params.include?(:jsonpath))
58
64
  end
59
65
 
60
66
  def fetch_params(node)
data/test/ias_test.rb ADDED
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class IasTest < Minitest::Test
6
+ def test_malicious_ip
7
+ ip = '149.210.164.47'
8
+ match = Legitbot::Ias.new ip
9
+
10
+ refute_predicate match, :valid?
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '18.214.43.70'
15
+ match = Legitbot::Ias.new ip
16
+
17
+ assert_predicate match, :valid?
18
+ end
19
+
20
+ def test_malicious_ua
21
+ bot = Legitbot.bot(
22
+ 'IAS Crawler (ias_crawler; http://integralads.com/site-indexing-policy/)',
23
+ '18.214.43.72'
24
+ )
25
+
26
+ assert bot
27
+ refute_predicate bot, :valid?
28
+ end
29
+
30
+ def test_valid_ua
31
+ bot = Legitbot.bot(
32
+ 'IAS Crawler (ias_crawler; http://integralads.com/site-indexing-policy/)',
33
+ '18.214.43.70'
34
+ )
35
+
36
+ assert bot
37
+ assert_predicate bot, :valid?
38
+ end
39
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.3
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-06 00:00:00.000000000 Z
11
+ date: 2023-07-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast_interval_tree
@@ -50,186 +50,6 @@ dependencies:
50
50
  - - ">="
51
51
  - !ruby/object:Gem::Version
52
52
  version: 0.2.1
53
- - !ruby/object:Gem::Dependency
54
- name: bump
55
- requirement: !ruby/object:Gem::Requirement
56
- requirements:
57
- - - "~>"
58
- - !ruby/object:Gem::Version
59
- version: '0.8'
60
- - - ">="
61
- - !ruby/object:Gem::Version
62
- version: 0.8.0
63
- type: :development
64
- prerelease: false
65
- version_requirements: !ruby/object:Gem::Requirement
66
- requirements:
67
- - - "~>"
68
- - !ruby/object:Gem::Version
69
- version: '0.8'
70
- - - ">="
71
- - !ruby/object:Gem::Version
72
- version: 0.8.0
73
- - !ruby/object:Gem::Dependency
74
- name: dns_mock
75
- requirement: !ruby/object:Gem::Requirement
76
- requirements:
77
- - - "~>"
78
- - !ruby/object:Gem::Version
79
- version: 1.5.0
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: 1.5.0
83
- type: :development
84
- prerelease: false
85
- version_requirements: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - "~>"
88
- - !ruby/object:Gem::Version
89
- version: 1.5.0
90
- - - ">="
91
- - !ruby/object:Gem::Version
92
- version: 1.5.0
93
- - !ruby/object:Gem::Dependency
94
- name: minitest
95
- requirement: !ruby/object:Gem::Requirement
96
- requirements:
97
- - - "~>"
98
- - !ruby/object:Gem::Version
99
- version: '5.1'
100
- - - ">="
101
- - !ruby/object:Gem::Version
102
- version: 5.1.0
103
- type: :development
104
- prerelease: false
105
- version_requirements: !ruby/object:Gem::Requirement
106
- requirements:
107
- - - "~>"
108
- - !ruby/object:Gem::Version
109
- version: '5.1'
110
- - - ">="
111
- - !ruby/object:Gem::Version
112
- version: 5.1.0
113
- - !ruby/object:Gem::Dependency
114
- name: minitest-hooks
115
- requirement: !ruby/object:Gem::Requirement
116
- requirements:
117
- - - "~>"
118
- - !ruby/object:Gem::Version
119
- version: '1.5'
120
- - - ">="
121
- - !ruby/object:Gem::Version
122
- version: 1.5.0
123
- type: :development
124
- prerelease: false
125
- version_requirements: !ruby/object:Gem::Requirement
126
- requirements:
127
- - - "~>"
128
- - !ruby/object:Gem::Version
129
- version: '1.5'
130
- - - ">="
131
- - !ruby/object:Gem::Version
132
- version: 1.5.0
133
- - !ruby/object:Gem::Dependency
134
- name: nokogiri
135
- requirement: !ruby/object:Gem::Requirement
136
- requirements:
137
- - - "~>"
138
- - !ruby/object:Gem::Version
139
- version: '1.14'
140
- - - ">="
141
- - !ruby/object:Gem::Version
142
- version: 1.14.3
143
- type: :development
144
- prerelease: false
145
- version_requirements: !ruby/object:Gem::Requirement
146
- requirements:
147
- - - "~>"
148
- - !ruby/object:Gem::Version
149
- version: '1.14'
150
- - - ">="
151
- - !ruby/object:Gem::Version
152
- version: 1.14.3
153
- - !ruby/object:Gem::Dependency
154
- name: rake
155
- requirement: !ruby/object:Gem::Requirement
156
- requirements:
157
- - - "~>"
158
- - !ruby/object:Gem::Version
159
- version: '13.0'
160
- - - ">="
161
- - !ruby/object:Gem::Version
162
- version: 13.0.0
163
- type: :development
164
- prerelease: false
165
- version_requirements: !ruby/object:Gem::Requirement
166
- requirements:
167
- - - "~>"
168
- - !ruby/object:Gem::Version
169
- version: '13.0'
170
- - - ">="
171
- - !ruby/object:Gem::Version
172
- version: 13.0.0
173
- - !ruby/object:Gem::Dependency
174
- name: rubocop
175
- requirement: !ruby/object:Gem::Requirement
176
- requirements:
177
- - - "~>"
178
- - !ruby/object:Gem::Version
179
- version: 1.50.0
180
- - - ">="
181
- - !ruby/object:Gem::Version
182
- version: 1.50.0
183
- type: :development
184
- prerelease: false
185
- version_requirements: !ruby/object:Gem::Requirement
186
- requirements:
187
- - - "~>"
188
- - !ruby/object:Gem::Version
189
- version: 1.50.0
190
- - - ">="
191
- - !ruby/object:Gem::Version
192
- version: 1.50.0
193
- - !ruby/object:Gem::Dependency
194
- name: rubocop-minitest
195
- requirement: !ruby/object:Gem::Requirement
196
- requirements:
197
- - - "~>"
198
- - !ruby/object:Gem::Version
199
- version: 0.31.0
200
- - - ">="
201
- - !ruby/object:Gem::Version
202
- version: 0.31.0
203
- type: :development
204
- prerelease: false
205
- version_requirements: !ruby/object:Gem::Requirement
206
- requirements:
207
- - - "~>"
208
- - !ruby/object:Gem::Version
209
- version: 0.31.0
210
- - - ">="
211
- - !ruby/object:Gem::Version
212
- version: 0.31.0
213
- - !ruby/object:Gem::Dependency
214
- name: simplecov-cobertura
215
- requirement: !ruby/object:Gem::Requirement
216
- requirements:
217
- - - "~>"
218
- - !ruby/object:Gem::Version
219
- version: '2.0'
220
- - - ">="
221
- - !ruby/object:Gem::Version
222
- version: '2.0'
223
- type: :development
224
- prerelease: false
225
- version_requirements: !ruby/object:Gem::Requirement
226
- requirements:
227
- - - "~>"
228
- - !ruby/object:Gem::Version
229
- version: '2.0'
230
- - - ">="
231
- - !ruby/object:Gem::Version
232
- version: '2.0'
233
53
  description: Does Web request come from a real search engine or from an impersonating
234
54
  agent?
235
55
  email: self@alaz.me
@@ -261,6 +81,7 @@ files:
261
81
  - lib/legitbot/duckduckgo.rb
262
82
  - lib/legitbot/facebook.rb
263
83
  - lib/legitbot/google.rb
84
+ - lib/legitbot/ias.rb
264
85
  - lib/legitbot/legitbot.rb
265
86
  - lib/legitbot/oracle.rb
266
87
  - lib/legitbot/petalbot.rb
@@ -284,6 +105,7 @@ files:
284
105
  - test/botmatch_test.rb
285
106
  - test/facebook_test.rb
286
107
  - test/google_test.rb
108
+ - test/ias_test.rb
287
109
  - test/legitbot/validators/domains_test.rb
288
110
  - test/legitbot/validators/ip_ranges_test.rb
289
111
  - test/legitbot_test.rb