spidr 0.6.1 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +11 -0
  3. data/.github/workflows/ruby.yml +26 -0
  4. data/.gitignore +4 -5
  5. data/ChangeLog.md +19 -1
  6. data/Gemfile +7 -4
  7. data/LICENSE.txt +1 -1
  8. data/README.md +136 -79
  9. data/Rakefile +1 -0
  10. data/gemspec.yml +7 -0
  11. data/lib/spidr/agent/actions.rb +3 -1
  12. data/lib/spidr/agent/events.rb +3 -1
  13. data/lib/spidr/agent/filters.rb +57 -56
  14. data/lib/spidr/agent/robots.rb +2 -0
  15. data/lib/spidr/agent/sanitizers.rb +7 -8
  16. data/lib/spidr/agent.rb +232 -108
  17. data/lib/spidr/auth_credential.rb +2 -0
  18. data/lib/spidr/auth_store.rb +9 -7
  19. data/lib/spidr/cookie_jar.rb +7 -5
  20. data/lib/spidr/extensions/uri.rb +3 -1
  21. data/lib/spidr/extensions.rb +3 -1
  22. data/lib/spidr/page/content_types.rb +53 -0
  23. data/lib/spidr/page/cookies.rb +2 -0
  24. data/lib/spidr/page/html.rb +21 -20
  25. data/lib/spidr/page/status_codes.rb +15 -11
  26. data/lib/spidr/page.rb +3 -1
  27. data/lib/spidr/proxy.rb +8 -14
  28. data/lib/spidr/rules.rb +7 -8
  29. data/lib/spidr/session_cache.rb +26 -22
  30. data/lib/spidr/settings/proxy.rb +22 -6
  31. data/lib/spidr/settings/timeouts.rb +2 -0
  32. data/lib/spidr/settings/user_agent.rb +2 -0
  33. data/lib/spidr/settings.rb +5 -3
  34. data/lib/spidr/spidr.rb +22 -11
  35. data/lib/spidr/version.rb +3 -1
  36. data/lib/spidr.rb +5 -3
  37. data/spec/agent_spec.rb +356 -7
  38. data/spec/example_page.rb +2 -0
  39. data/spec/page/content_types_spec.rb +22 -0
  40. data/spec/page/html_spec.rb +255 -51
  41. data/spec/page/status_codes_spec.rb +4 -4
  42. data/spec/proxy_spec.rb +2 -2
  43. data/spec/settings/proxy_examples.rb +31 -11
  44. data/spec/spec_helper.rb +3 -0
  45. data/spidr.gemspec +1 -4
  46. metadata +8 -7
  47. data/.travis.yml +0 -16
data/lib/spidr/agent.rb CHANGED
@@ -1,14 +1,16 @@
1
- require 'spidr/settings/user_agent'
2
- require 'spidr/agent/sanitizers'
3
- require 'spidr/agent/filters'
4
- require 'spidr/agent/events'
5
- require 'spidr/agent/actions'
6
- require 'spidr/agent/robots'
7
- require 'spidr/page'
8
- require 'spidr/session_cache'
9
- require 'spidr/cookie_jar'
10
- require 'spidr/auth_store'
11
- require 'spidr/spidr'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/user_agent'
4
+ require_relative 'agent/sanitizers'
5
+ require_relative 'agent/filters'
6
+ require_relative 'agent/events'
7
+ require_relative 'agent/actions'
8
+ require_relative 'agent/robots'
9
+ require_relative 'page'
10
+ require_relative 'session_cache'
11
+ require_relative 'cookie_jar'
12
+ require_relative 'auth_store'
13
+ require_relative 'spidr'
12
14
 
13
15
  require 'openssl'
14
16
  require 'net/http'
@@ -19,12 +21,12 @@ module Spidr
19
21
 
20
22
  include Settings::UserAgent
21
23
 
22
- # HTTP Host Header to use
24
+ # HTTP Host `Header` to use
23
25
  #
24
26
  # @return [String]
25
27
  attr_accessor :host_header
26
28
 
27
- # HTTP Host Headers to use for specific hosts
29
+ # HTTP `Host` Headers to use for specific hosts
28
30
  #
29
31
  # @return [Hash{String,Regexp => String}]
30
32
  attr_reader :host_headers
@@ -96,70 +98,110 @@ module Spidr
96
98
  #
97
99
  # Creates a new Agent object.
98
100
  #
99
- # @param [Hash] options
100
- # Additional options
101
+ # @param [String, nil] host_header
102
+ # The HTTP `Host` header to use with each request.
101
103
  #
102
- # @option options [Integer] :open_timeout (Spidr.open_timeout)
103
- # Optional open timeout.
104
+ # @param [Hash{String,Regexp => String}] host_headers
105
+ # The HTTP `Host` headers to use for specific hosts.
104
106
  #
105
- # @option options [Integer] :read_timeout (Spidr.read_timeout)
107
+ # @param [Hash{String => String}] default_headers
108
+ # Default headers to set for every request.
109
+ #
110
+ # @param [String, nil] user_agent
111
+ # The `User-Agent` string to send with each requests.
112
+ #
113
+ # @param [String, nil] referer
114
+ # The `Referer` URL to send with each request.
115
+ #
116
+ # @param [Integer, nil] open_timeout
117
+ # Optional open connection timeout.
118
+ #
119
+ # @param [Integer, nil] read_timeout
106
120
  # Optional read timeout.
107
121
  #
108
- # @option options [Integer] :ssl_timeout (Spidr.ssl_timeout)
109
- # Optional ssl timeout.
122
+ # @param [Integer, nil] ssl_timeout
123
+ # Optional SSL connection timeout.
110
124
  #
111
- # @option options [Integer] :continue_timeout (Spidr.continue_timeout)
125
+ # @param [Integer, nil] continue_timeout
112
126
  # Optional continue timeout.
113
127
  #
114
- # @option options [Integer] :keep_alive_timeout (Spidr.keep_alive_timeout)
115
- # Optional keep_alive timeout.
128
+ # @param [Integer, nil] keep_alive_timeout
129
+ # Optional `Keep-Alive` timeout.
116
130
  #
117
- # @option options [Hash] :proxy (Spidr.proxy)
131
+ # @param [Spidr::Proxy, Hash, URI::HTTP, String, nil] proxy
118
132
  # The proxy information to use.
119
133
  #
120
- # @option :proxy [String] :host
134
+ # @option proxy [String] :host
121
135
  # The host the proxy is running on.
122
136
  #
123
- # @option :proxy [Integer] :port
137
+ # @option proxy [Integer] :port (8080)
124
138
  # The port the proxy is running on.
125
139
  #
126
- # @option :proxy [String] :user
140
+ # @option proxy [String, nil] :user
127
141
  # The user to authenticate as with the proxy.
128
142
  #
129
- # @option :proxy [String] :password
143
+ # @option proxy [String, nil] :password
130
144
  # The password to authenticate with.
131
145
  #
132
- # @option options [Hash{String => String}] :default_headers
133
- # Default headers to set for every request.
146
+ # @param [Integer] delay
147
+ # The number of seconds to pause between each request.
134
148
  #
135
- # @option options [String] :host_header
136
- # The HTTP Host header to use with each request.
149
+ # @param [Integer, nil] limit
150
+ # The maximum number of pages to visit.
137
151
  #
138
- # @option options [Hash{String,Regexp => String}] :host_headers
139
- # The HTTP Host headers to use for specific hosts.
152
+ # @param [Integer, nil] max_depth
153
+ # The maximum link depth to follow.
140
154
  #
141
- # @option options [String] :user_agent (Spidr.user_agent)
142
- # The User-Agent string to send with each requests.
155
+ # @param [Set, Array, nil] queue
156
+ # The initial queue of URLs to visit.
143
157
  #
144
- # @option options [String] :referer
145
- # The Referer URL to send with each request.
158
+ # @param [Set, Array, nil] history
159
+ # The initial list of visited URLs.
146
160
  #
147
- # @option options [Integer] :delay (0)
148
- # The number of seconds to pause between each request.
161
+ # @param [Boolean] strip_fragments
162
+ # Controls whether to strip the fragment components from the URLs.
149
163
  #
150
- # @option options [Set, Array] :queue
151
- # The initial queue of URLs to visit.
164
+ # @param [Boolean] strip_query
165
+ # Controls whether to strip the query components from the URLs.
152
166
  #
153
- # @option options [Set, Array] :history
154
- # The initial list of visited URLs.
167
+ # @param [Array<String>] schemes
168
+ # The list of acceptable URI schemes to visit.
169
+ # The `https` scheme will be ignored if `net/https` cannot be loaded.
155
170
  #
156
- # @option options [Integer] :limit
157
- # The maximum number of pages to visit.
171
+ # @param [String] host
172
+ # The host-name to visit.
158
173
  #
159
- # @option options [Integer] :max_depth
160
- # The maximum link depth to follow.
174
+ # @param [Array<String, Regexp, Proc>] hosts
175
+ # The patterns which match the host-names to visit.
161
176
  #
162
- # @option options [Boolean] :robots (Spidr.robots?)
177
+ # @param [Array<String, Regexp, Proc>] ignore_hosts
178
+ # The patterns which match the host-names to not visit.
179
+ #
180
+ # @param [Array<Integer, Regexp, Proc>] ports
181
+ # The patterns which match the ports to visit.
182
+ #
183
+ # @param [Array<Integer, Regexp, Proc>] ignore_ports
184
+ # The patterns which match the ports to not visit.
185
+ #
186
+ # @param [Array<String, Regexp, Proc>] links
187
+ # The patterns which match the links to visit.
188
+ #
189
+ # @param [Array<String, Regexp, Proc>] ignore_links
190
+ # The patterns which match the links to not visit.
191
+ #
192
+ # @param [Array<String, Regexp, Proc>] urls
193
+ # The patterns which match the URLs to visit.
194
+ #
195
+ # @param [Array<String, Regexp, Proc>] ignore_urls
196
+ # The patterns which match the URLs to not visit.
197
+ #
198
+ # @param [Array<String, Regexp, Proc>] exts
199
+ # The patterns which match the URI path extensions to visit.
200
+ #
201
+ # @param [Array<String, Regexp, Proc>] ignore_exts
202
+ # The patterns which match the URI path extensions to not visit.
203
+ #
204
+ # @param [Boolean] robots
163
205
  # Specifies whether `robots.txt` should be honored.
164
206
  #
165
207
  # @yield [agent]
@@ -169,58 +211,99 @@ module Spidr
169
211
  # @yieldparam [Agent] agent
170
212
  # The newly created agent.
171
213
  #
172
- # @see #initialize_sanitizers
173
- # @see #initialize_filters
174
- # @see #initialize_actions
175
- # @see #initialize_events
176
- #
177
- def initialize(options={})
178
- @host_header = options[:host_header]
179
- @host_headers = {}
180
-
181
- if options[:host_headers]
182
- @host_headers.merge!(options[:host_headers])
183
- end
184
-
185
- @default_headers = {}
186
-
187
- if options[:default_headers]
188
- @default_headers.merge!(options[:default_headers])
189
- end
190
-
191
- @user_agent = options.fetch(:user_agent,Spidr.user_agent)
192
- @referer = options[:referer]
193
-
194
- @sessions = SessionCache.new(options)
214
+ def initialize(# header keyword arguments
215
+ host_header: nil,
216
+ host_headers: {},
217
+ default_headers: {},
218
+ user_agent: Spidr.user_agent,
219
+ referer: nil,
220
+ # session cache keyword arguments
221
+ proxy: Spidr.proxy,
222
+ open_timeout: Spidr.open_timeout,
223
+ ssl_timeout: Spidr.ssl_timeout,
224
+ read_timeout: Spidr.read_timeout,
225
+ continue_timeout: Spidr.continue_timeout,
226
+ keep_alive_timeout: Spidr.keep_alive_timeout,
227
+ # spidering controls keyword arguments
228
+ delay: 0,
229
+ limit: nil,
230
+ max_depth: nil,
231
+ # history keyword arguments
232
+ queue: nil,
233
+ history: nil,
234
+ # sanitizer keyword arguments
235
+ strip_fragments: true,
236
+ strip_query: false,
237
+ # filtering keyword arguments
238
+ schemes: self.class.default_schemes,
239
+ host: nil,
240
+ hosts: nil,
241
+ ignore_hosts: nil,
242
+ ports: nil,
243
+ ignore_ports: nil,
244
+ links: nil,
245
+ ignore_links: nil,
246
+ urls: nil,
247
+ ignore_urls: nil,
248
+ exts: nil,
249
+ ignore_exts: nil,
250
+ # robots keyword arguments
251
+ robots: Spidr.robots?)
252
+ @host_header = host_header
253
+ @host_headers = host_headers
254
+
255
+ @default_headers = default_headers
256
+
257
+ @user_agent = user_agent
258
+ @referer = referer
259
+
260
+ @sessions = SessionCache.new(
261
+ proxy: proxy,
262
+ open_timeout: open_timeout,
263
+ ssl_timeout: ssl_timeout,
264
+ read_timeout: read_timeout,
265
+ continue_timeout: continue_timeout,
266
+ keep_alive_timeout: keep_alive_timeout
267
+ )
195
268
  @cookies = CookieJar.new
196
269
  @authorized = AuthStore.new
197
270
 
198
271
  @running = false
199
- @delay = options.fetch(:delay,0)
272
+ @delay = delay
200
273
  @history = Set[]
201
274
  @failures = Set[]
202
275
  @queue = []
203
276
 
204
- @limit = options[:limit]
277
+ @limit = limit
205
278
  @levels = Hash.new(0)
206
- @max_depth = options[:max_depth]
207
-
208
- if options[:queue]
209
- self.queue = options[:queue]
210
- end
211
-
212
- if options[:history]
213
- self.history = options[:history]
214
- end
215
-
216
- initialize_sanitizers(options)
217
- initialize_filters(options)
218
- initialize_actions(options)
219
- initialize_events(options)
220
-
221
- if options.fetch(:robots,Spidr.robots?)
222
- initialize_robots
223
- end
279
+ @max_depth = max_depth
280
+
281
+ self.queue = queue if queue
282
+ self.history = history if history
283
+
284
+ initialize_sanitizers(
285
+ strip_fragments: strip_fragments,
286
+ strip_query: strip_query
287
+ )
288
+
289
+ initialize_filters(
290
+ schemes: schemes,
291
+ host: host,
292
+ hosts: hosts,
293
+ ignore_hosts: ignore_hosts,
294
+ ports: ports,
295
+ ignore_ports: ignore_ports,
296
+ links: links,
297
+ ignore_links: ignore_links,
298
+ urls: urls,
299
+ ignore_urls: ignore_urls,
300
+ exts: exts,
301
+ ignore_exts: ignore_exts
302
+ )
303
+ initialize_actions
304
+ initialize_events
305
+
306
+ initialize_robots if robots
224
307
 
225
308
  yield self if block_given?
226
309
  end
@@ -231,8 +314,8 @@ module Spidr
231
314
  # @param [URI::HTTP, String] url
232
315
  # The URL to start spidering at.
233
316
  #
234
- # @param [Hash] options
235
- # Additional options. See {Agent#initialize}.
317
+ # @param [Hash{Symbol => Object}] kwargs
318
+ # Additional keyword arguments. See {Agent#initialize}.
236
319
  #
237
320
  # @yield [agent]
238
321
  # If a block is given, it will be passed the newly created agent
@@ -241,12 +324,16 @@ module Spidr
241
324
  # @yieldparam [Agent] agent
242
325
  # The newly created agent.
243
326
  #
327
+ # @return [Agent]
328
+ # The created agent object.
329
+ #
244
330
  # @see #initialize
245
331
  # @see #start_at
246
332
  #
247
- def self.start_at(url,options={},&block)
248
- agent = new(options,&block)
333
+ def self.start_at(url,**kwargs,&block)
334
+ agent = new(**kwargs,&block)
249
335
  agent.start_at(url)
336
+ return agent
250
337
  end
251
338
 
252
339
  #
@@ -255,8 +342,8 @@ module Spidr
255
342
  # @param [URI::HTTP, String] url
256
343
  # The web-site to spider.
257
344
  #
258
- # @param [Hash] options
259
- # Additional options. See {Agent#initialize}.
345
+ # @param [Hash{Symbol => Object}] kwargs
346
+ # Additional keyword arguments. See {Agent#initialize}.
260
347
  #
261
348
  # @yield [agent]
262
349
  # If a block is given, it will be passed the newly created agent
@@ -265,13 +352,17 @@ module Spidr
265
352
  # @yieldparam [Agent] agent
266
353
  # The newly created agent.
267
354
  #
355
+ # @return [Agent]
356
+ # The created agent object.
357
+ #
268
358
  # @see #initialize
269
359
  #
270
- def self.site(url,options={},&block)
360
+ def self.site(url,**kwargs,&block)
271
361
  url = URI(url)
272
362
 
273
- agent = new(options.merge(host: url.host),&block)
363
+ agent = new(host: url.host, **kwargs, &block)
274
364
  agent.start_at(url)
365
+ return agent
275
366
  end
276
367
 
277
368
  #
@@ -280,8 +371,35 @@ module Spidr
280
371
  # @param [String] name
281
372
  # The host-name to spider.
282
373
  #
283
- # @param [Hash] options
284
- # Additional options. See {Agent#initialize}.
374
+ # @param [Hash{Symbol => Object}] kwargs
375
+ # Additional keyword arguments. See {Agent#initialize}.
376
+ #
377
+ # @yield [agent]
378
+ # If a block is given, it will be passed the newly created agent
379
+ # before it begins spidering.
380
+ #
381
+ # @yieldparam [Agent] agent
382
+ # The newly created agent.
383
+ #
384
+ # @return [Agent]
385
+ # The created agent object.
386
+ #
387
+ # @see #initialize
388
+ #
389
+ def self.host(name,**kwargs,&block)
390
+ agent = new(host: name, **kwargs, &block)
391
+ agent.start_at(URI::HTTP.build(host: name, path: '/'))
392
+ return agent
393
+ end
394
+
395
+ #
396
+ # Creates a new agent and spiders the entire domain.
397
+ #
398
+ # @param [String] name
399
+ # The top-level domain to spider.
400
+ #
401
+ # @param [Hash{Symbol => Object}] kwargs
402
+ # Additional keyword arguments. See {Agent#initialize}.
285
403
  #
286
404
  # @yield [agent]
287
405
  # If a block is given, it will be passed the newly created agent
@@ -290,11 +408,17 @@ module Spidr
290
408
  # @yieldparam [Agent] agent
291
409
  # The newly created agent.
292
410
  #
411
+ # @return [Agent]
412
+ # The created agent object.
413
+ #
293
414
  # @see #initialize
294
415
  #
295
- def self.host(name,options={},&block)
296
- agent = new(options.merge(host: name),&block)
416
+ # @since 0.7.0
417
+ #
418
+ def self.domain(name,**kwargs,&block)
419
+ agent = new(host: /(^|\.)#{Regexp.escape(name)}$/, **kwargs, &block)
297
420
  agent.start_at(URI::HTTP.build(host: name, path: '/'))
421
+ return agent
298
422
  end
299
423
 
300
424
  #
@@ -314,10 +438,10 @@ module Spidr
314
438
  #
315
439
  # Sets the proxy information that the agent uses.
316
440
  #
317
- # @param [Proxy] new_proxy
441
+ # @param [Proxy, Hash, URI::HTTP, String, nil] new_proxy
318
442
  # The new proxy information.
319
443
  #
320
- # @return [Hash]
444
+ # @return [Proxy]
321
445
  # The new proxy information.
322
446
  #
323
447
  # @see SessionCache#proxy=
@@ -534,7 +658,7 @@ module Spidr
534
658
  def enqueue(url,level=0)
535
659
  url = sanitize_url(url)
536
660
 
537
- if (!(queued?(url)) && visit?(url))
661
+ if (!queued?(url) && visit?(url))
538
662
  link = url.to_s
539
663
 
540
664
  begin
@@ -633,7 +757,7 @@ module Spidr
633
757
  end
634
758
 
635
759
  #
636
- # Visits a given URL, and enqueus the links recovered from the URL
760
+ # Visits a given URL, and enqueues the links recovered from the URL
637
761
  # to be visited later.
638
762
  #
639
763
  # @param [URI::HTTP, String] url
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # Represents HTTP Authentication credentials for a website.
@@ -1,6 +1,8 @@
1
- require 'spidr/extensions/uri'
2
- require 'spidr/auth_credential'
3
- require 'spidr/page'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extensions/uri'
4
+ require_relative 'auth_credential'
5
+ require_relative 'page'
4
6
 
5
7
  require 'base64'
6
8
 
@@ -20,7 +22,7 @@ module Spidr
20
22
  @credentials = {}
21
23
  end
22
24
 
23
- #
25
+ #
24
26
  # Given a URL, return the most specific matching auth credential.
25
27
  #
26
28
  # @param [URI] url
@@ -54,7 +56,7 @@ module Spidr
54
56
  return nil
55
57
  end
56
58
 
57
- #
59
+ #
58
60
  # Add an auth credential to the store for supplied base URL.
59
61
  #
60
62
  # @param [URI] url
@@ -109,7 +111,7 @@ module Spidr
109
111
  # or `nil` if no authorization exists.
110
112
  #
111
113
  # @param [URI] url
112
- # The url.
114
+ # The URL.
113
115
  #
114
116
  # @return [String, nil]
115
117
  # The base64 encoded authorizatio string or `nil`.
@@ -122,7 +124,7 @@ module Spidr
122
124
  end
123
125
  end
124
126
 
125
- #
127
+ #
126
128
  # Clear the contents of the auth store.
127
129
  #
128
130
  # @return [AuthStore]
@@ -1,4 +1,6 @@
1
- require 'spidr/page'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'page'
2
4
 
3
5
  require 'set'
4
6
 
@@ -42,8 +44,8 @@ module Spidr
42
44
  @params.each(&block)
43
45
  end
44
46
 
45
- #
46
- # Return all relevant cookies in a single string for the
47
+ #
48
+ # Return all relevant cookies in a single string for the
47
49
  # named host or domain (in browser request format).
48
50
  #
49
51
  # @param [String] host
@@ -59,7 +61,7 @@ module Spidr
59
61
  @params[host] ||= {}
60
62
  end
61
63
 
62
- #
64
+ #
63
65
  # Add a cookie to the jar for a particular domain.
64
66
  #
65
67
  # @param [String] host
@@ -166,7 +168,7 @@ module Spidr
166
168
  return host_cookies
167
169
  end
168
170
 
169
- #
171
+ #
170
172
  # Clear out the jar, removing all stored cookies.
171
173
  #
172
174
  # @since 0.2.2
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
  require 'strscan'
3
5
 
@@ -58,7 +60,7 @@ module URI
58
60
  unless stack.empty?
59
61
  "#{leading_slash}#{stack.join('/')}#{trailing_slash}"
60
62
  else
61
- '/'
63
+ String.new('/')
62
64
  end
63
65
  end
64
66
  end
@@ -1 +1,3 @@
1
- require 'spidr/extensions/uri'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extensions/uri'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Page
3
5
  #
@@ -221,5 +223,56 @@ module Spidr
221
223
  def zip?
222
224
  is_content_type?('application/zip')
223
225
  end
226
+
227
+ #
228
+ # Determines if the page is a PNG image.
229
+ #
230
+ # @return [Boolean]
231
+ # Specifies whether the page is a PNG image.
232
+ #
233
+ # @since 0.7.0
234
+ #
235
+ def png?
236
+ is_content_type?('image/png')
237
+ end
238
+
239
+ #
240
+ # Determines if the page is a GIF image.
241
+ #
242
+ # @return [Boolean]
243
+ # Specifies whether the page is a GIF image.
244
+ #
245
+ # @since 0.7.0
246
+ #
247
+ def gif?
248
+ is_content_type?('image/gif')
249
+ end
250
+
251
+ #
252
+ # Determines if the page is a JPEG image.
253
+ #
254
+ # @return [Boolean]
255
+ # Specifies whether the page is a JPEG image.
256
+ #
257
+ # @since 0.7.0
258
+ #
259
+ def jpeg?
260
+ is_content_type?('image/jpeg')
261
+ end
262
+
263
+ #
264
+ # Determines if the page is a ICO image.
265
+ #
266
+ # @return [Boolean]
267
+ # Specifies whether the page is a ICO image.
268
+ #
269
+ # @since 0.7.0
270
+ #
271
+ def ico?
272
+ is_content_type?('image/x-icon') ||
273
+ is_content_type?('image/vnd.microsoft.icon')
274
+ end
275
+
276
+ alias icon? ico?
224
277
  end
225
278
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
 
3
5
  module Spidr